diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py index 9a686e0d94..dcbffade92 100644 --- a/src/calibre/customize/builtins.py +++ b/src/calibre/customize/builtins.py @@ -287,6 +287,7 @@ from calibre.ebooks.odt.input import ODTInput from calibre.ebooks.rtf.input import RTFInput from calibre.ebooks.html.input import HTMLInput from calibre.ebooks.comic.input import ComicInput +from calibre.web.feeds.input import RecipeInput from calibre.ebooks.oeb.output import OEBOutput from calibre.ebooks.epub.output import EPUBOutput from calibre.ebooks.txt.output import TXTOutput @@ -296,7 +297,7 @@ from calibre.customize.profiles import input_profiles, output_profiles plugins = [HTML2ZIP, EPUBInput, MOBIInput, PDBInput, PDFInput, HTMLInput, TXTInput, OEBOutput, TXTOutput, PDFOutput, LITInput, ComicInput, - FB2Input, ODTInput, RTFInput, EPUBOutput, EREADEROutput] + FB2Input, ODTInput, RTFInput, EPUBOutput, EREADEROutput, RecipeInput] plugins += [x for x in list(locals().values()) if isinstance(x, type) and \ x.__name__.endswith('MetadataReader')] plugins += [x for x in list(locals().values()) if isinstance(x, type) and \ diff --git a/src/calibre/ebooks/conversion/cli.py b/src/calibre/ebooks/conversion/cli.py index 941a1ec5fc..d8de702915 100644 --- a/src/calibre/ebooks/conversion/cli.py +++ b/src/calibre/ebooks/conversion/cli.py @@ -52,7 +52,7 @@ def check_command_line_options(parser, args, log): raise SystemExit(1) input = os.path.abspath(args[1]) - if not os.access(input, os.R_OK): + if not input.endswith('.recipe') and not os.access(input, os.R_OK): log.error('Cannot read from', input) raise SystemExit(1) @@ -169,6 +169,9 @@ def add_pipeline_options(parser, plumber): if rec.level < rec.HIGH: option_recommendation_to_cli_option(add_option, rec) + option_recommendation_to_cli_option(parser.add_option, + plumber.get_option_by_name('list_recipes')) + def option_parser(): return OptionParser(usage=USAGE) diff --git a/src/calibre/ebooks/conversion/plumber.py b/src/calibre/ebooks/conversion/plumber.py index da9c9f11e2..1ef58e1d95 100644 --- a/src/calibre/ebooks/conversion/plumber.py +++ b/src/calibre/ebooks/conversion/plumber.py @@ -360,6 +360,10 @@ OptionRecommendation(name='book_producer', OptionRecommendation(name='language', recommended_value=None, level=OptionRecommendation.LOW, help=_('Set the language.')), + +OptionRecommendation(name='list_recipes', + recommended_value=False, help=_('List available recipes.')), + ] input_fmt = os.path.splitext(self.input)[1] @@ -525,6 +529,13 @@ OptionRecommendation(name='language', self.setup_options() if self.opts.verbose: self.log.filter_level = self.log.DEBUG + if self.opts.list_recipes: + from calibre.web.feeds.recipes import titles + self.log('Available recipes:') + for title in sorted(titles): + self.log('\t'+title) + self.log('%d recipes available'%len(titles)) + raise SystemExit(0) # Run any preprocess plugins from calibre.customize.ui import run_plugins_on_preprocess @@ -535,8 +546,13 @@ OptionRecommendation(name='language', accelerators = {} tdir = PersistentTemporaryDirectory('_plumber') + stream = self.input if self.input_fmt == 'recipe' else \ + open(self.input, 'rb') - self.oeb = self.input_plugin(open(self.input, 'rb'), self.opts, + if hasattr(self.opts, 'lrf') and self.output_plugin.file_type == 'lrf': + self.opts.lrf = True + + self.oeb = self.input_plugin(stream, self.opts, self.input_fmt, self.log, accelerators, tdir) if self.opts.debug_input is not None: diff --git a/src/calibre/ebooks/oeb/base.py b/src/calibre/ebooks/oeb/base.py index 9d8598c766..f5395e04fe 100644 --- a/src/calibre/ebooks/oeb/base.py +++ b/src/calibre/ebooks/oeb/base.py @@ -1578,15 +1578,15 @@ class OEBBook(object): return data.decode('utf-16') except UnicodeDecodeError: pass - try: - return data.decode('utf-8') - except UnicodeDecodeError: - pass if self.encoding is not None: try: return data.decode(self.encoding) except UnicodeDecodeError: pass + try: + return data.decode('utf-8') + except UnicodeDecodeError: + pass data, _ = xml_to_unicode(data) data = data.replace('\r\n', '\n') data = data.replace('\r', '\n') diff --git a/src/calibre/ebooks/oeb/transforms/split.py b/src/calibre/ebooks/oeb/transforms/split.py index 21d71da5bb..ec3d63192d 100644 --- a/src/calibre/ebooks/oeb/transforms/split.py +++ b/src/calibre/ebooks/oeb/transforms/split.py @@ -59,6 +59,7 @@ class Split(object): self.fix_links() def split_item(self, item): + page_breaks, page_break_ids = [], [] if self.split_on_page_breaks: page_breaks, page_break_ids = self.find_page_breaks(item) diff --git a/src/calibre/web/__init__.py b/src/calibre/web/__init__.py index cadf21c39f..b14dc0ce28 100644 --- a/src/calibre/web/__init__.py +++ b/src/calibre/web/__init__.py @@ -2,5 +2,6 @@ __license__ = 'GPL v3' __copyright__ = '2008, Kovid Goyal ' +class Recipe(object): + pass - \ No newline at end of file diff --git a/src/calibre/web/feeds/input.py b/src/calibre/web/feeds/input.py new file mode 100644 index 0000000000..21324293d3 --- /dev/null +++ b/src/calibre/web/feeds/input.py @@ -0,0 +1,65 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai +from __future__ import with_statement + +__license__ = 'GPL v3' +__copyright__ = '2009, Kovid Goyal ' +__docformat__ = 'restructuredtext en' + +import os + +from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation + +class RecipeInput(InputFormatPlugin): + + name = 'Recipe Input' + author = 'Kovid Goyal' + description = _('Download periodical content from the internet') + file_types = set(['recipe']) + + recommendations = set([ + ('chapter_mark', 'none', OptionRecommendation.HIGH), + ('dont_split_on_page_breaks', True, OptionRecommendation.HIGH), + ('use_auto_toc', False, OptionRecommendation.HIGH), + ]) + + options = set([ + OptionRecommendation(name='test', recommended_value=False, + help=_('Useful for recipe development. Forces ' + 'max_articles_per_feed to 2 and downloads at most 2 feeds.')), + OptionRecommendation(name='username', recommended_value=None, + help=_('Username for sites that require a login to access ' + 'content.')), + OptionRecommendation(name='password', recommended_value=None, + help=_('Password for sites that require a login to access ' + 'content.')), + OptionRecommendation(name='lrf', recommended_value=False, + help='Optimize fetching for subsequent conversion to LRF.'), + ]) + + def convert(self, recipe_or_file, opts, file_ext, log, + accelerators, progress=lambda x, y: x): + from calibre.web.feeds.recipes import \ + get_builtin_recipe, compile_recipe + if os.access(recipe_or_file, os.R_OK): + recipe = compile_recipe(open(recipe_or_file, 'rb').read()) + else: + title = os.path.basename(recipe_or_file).rpartition('.')[0] + recipe = get_builtin_recipe(title) + + if recipe is None: + raise ValueError('%s is not a valid recipe file or builtin recipe' % + recipe_or_file) + + ro = recipe(opts, log, progress) + ro.download() + + opts.output_profile.flow_size = 0 + + for f in os.listdir('.'): + if f.endswith('.opf'): + return os.path.abspath(f) + + + + diff --git a/src/calibre/web/feeds/news.py b/src/calibre/web/feeds/news.py index 6a248b6992..216a827326 100644 --- a/src/calibre/web/feeds/news.py +++ b/src/calibre/web/feeds/news.py @@ -20,6 +20,7 @@ from calibre import browser, __appname__, iswindows, \ from calibre.ebooks.BeautifulSoup import BeautifulSoup, NavigableString, CData, Tag from calibre.ebooks.metadata.opf2 import OPFCreator from calibre.ebooks.lrf import entity_to_unicode +from calibre.web import Recipe from calibre.ebooks import render_html from calibre.ebooks.metadata.toc import TOC from calibre.ebooks.metadata import MetaInformation @@ -27,12 +28,11 @@ from calibre.web.feeds import feed_from_xml, templates, feeds_from_index, Feed from calibre.web.fetch.simple import option_parser as web2disk_option_parser from calibre.web.fetch.simple import RecursiveFetcher from calibre.utils.threadpool import WorkRequest, ThreadPool, NoResultsPending -from calibre.utils.logging import Log from calibre.ptempfile import PersistentTemporaryFile, \ PersistentTemporaryDirectory -class BasicNewsRecipe(object): +class BasicNewsRecipe(Recipe): ''' Abstract base class that contains logic needed in all feed fetchers. ''' @@ -443,40 +443,34 @@ class BasicNewsRecipe(object): ''' raise NotImplementedError - def __init__(self, options, parser, progress_reporter): + def __init__(self, options, log, progress_reporter): ''' Initialize the recipe. :param options: Parsed commandline options :param parser: Command line option parser. Used to intelligently merge options. :param progress_reporter: A Callable that takes two arguments: progress (a number between 0 and 1) and a string message. The message should be optional. ''' - self.log = Log() - if options.verbose: - self.log.filter_level = self.log.DEBUG + self.log = log if not isinstance(self.title, unicode): self.title = unicode(self.title, 'utf-8', 'replace') - for attr in ('username', 'password', 'lrf', 'output_dir', 'verbose', 'debug', 'test'): - setattr(self, attr, getattr(options, attr)) + self.debug = options.verbose > 1 + self.output_dir = os.getcwd() + self.verbose = options.verbose + self.test = options.test + self.username = options.username + self.password = options.password + self.lrf = options.lrf + self.output_dir = os.path.abspath(self.output_dir) if options.test: self.max_articles_per_feed = 2 self.simultaneous_downloads = min(4, self.simultaneous_downloads) - if self.debug: self.verbose = True self.report_progress = progress_reporter - self.username = self.password = None - #: If True optimize downloading for eventual conversion to LRF - self.lrf = False - defaults = parser.get_default_values() - - for opt in options.__dict__.keys(): - if getattr(options, opt) != getattr(defaults, opt, None): - setattr(self, opt, getattr(options, opt)) - if isinstance(self.feeds, basestring): self.feeds = eval(self.feeds) if isinstance(self.feeds, basestring): @@ -493,7 +487,6 @@ class BasicNewsRecipe(object): '--timeout', str(self.timeout), '--max-recursions', str(self.recursions), '--delay', str(self.delay), - '--timeout', str(self.timeout), ] if self.encoding is not None: web2disk_cmdline.extend(['--encoding', self.encoding]) @@ -520,9 +513,6 @@ class BasicNewsRecipe(object): self.simultaneous_downloads = 1 self.navbar = templates.NavBarTemplate() - self.html2lrf_options.extend(['--page-break-before', '$', '--use-spine', '--header', '--encoding', 'utf-8']) - if '--base-font-size' not in self.html2lrf_options: - self.html2lrf_options.extend(['--base-font-size', '12']) self.failed_downloads = [] self.partial_failures = [] @@ -557,7 +547,7 @@ class BasicNewsRecipe(object): return self.postprocess_html(soup, first_fetch) - def download(self, for_lrf=False): + def download(self): ''' Download and pre-process all articles from the feeds in this recipe. This method should be called only one on a particular Recipe instance.