News download: When using the debug pipeline options, create a zip file named periodical.downloaded_recipe in the debug directory. This can be passed to ebook-convert to directly convert a previous download into an e-book.

This commit is contained in:
Kovid Goyal 2010-02-23 12:33:25 -07:00
parent 1a4caa3801
commit 180308c7ce
3 changed files with 81 additions and 40 deletions

View File

@ -214,8 +214,21 @@ class InputFormatPlugin(Plugin):
return ret return ret
def postprocess_book(self, oeb, opts, log): def postprocess_book(self, oeb, opts, log):
'''
Called to allow the input plugin to perform postprocessing after
the book has been parsed.
'''
pass pass
def specialize(self, oeb, opts, log, output_fmt):
'''
Called to allow the input plugin to specialize the parsed book
for a particular output format. Called after postprocess_book
and before any transforms are performed on the parsed book.
'''
pass
class OutputFormatPlugin(Plugin): class OutputFormatPlugin(Plugin):
''' '''
OutputFormatPlugins are responsible for converting an OEB document OutputFormatPlugins are responsible for converting an OEB document

View File

@ -13,6 +13,7 @@ from calibre.customize.ui import input_profiles, output_profiles, \
from calibre.ebooks.conversion.preprocess import HTMLPreProcessor from calibre.ebooks.conversion.preprocess import HTMLPreProcessor
from calibre.ptempfile import PersistentTemporaryDirectory from calibre.ptempfile import PersistentTemporaryDirectory
from calibre.utils.date import parse_date from calibre.utils.date import parse_date
from calibre.utils.zipfile import ZipFile
from calibre import extract, walk from calibre import extract, walk
DEBUG_README=u''' DEBUG_README=u'''
@ -726,6 +727,12 @@ OptionRecommendation(name='timestamp',
else: else:
os.makedirs(out_dir) os.makedirs(out_dir)
self.dump_oeb(ret, out_dir) self.dump_oeb(ret, out_dir)
if self.input_fmt == 'recipe':
zf = ZipFile(os.path.join(self.opts.debug_pipeline,
'periodical.downloaded_recipe'), 'w')
zf.add_dir(out_dir)
self.input_plugin.save_download(zf)
zf.close()
self.log.info('Input debug saved to:', out_dir) self.log.info('Input debug saved to:', out_dir)
@ -780,7 +787,7 @@ OptionRecommendation(name='timestamp',
self.dump_input(self.oeb, tdir) self.dump_input(self.oeb, tdir)
if self.abort_after_input_dump: if self.abort_after_input_dump:
return return
if self.input_fmt == 'recipe': if self.input_fmt in ('recipe', 'downloaded_recipe'):
self.opts_to_mi(self.user_metadata) self.opts_to_mi(self.user_metadata)
if not hasattr(self.oeb, 'manifest'): if not hasattr(self.oeb, 'manifest'):
self.oeb = create_oebbook(self.log, self.oeb, self.opts, self.oeb = create_oebbook(self.log, self.oeb, self.opts,
@ -793,6 +800,8 @@ OptionRecommendation(name='timestamp',
out_dir = os.path.join(self.opts.debug_pipeline, 'parsed') out_dir = os.path.join(self.opts.debug_pipeline, 'parsed')
self.dump_oeb(self.oeb, out_dir) self.dump_oeb(self.oeb, out_dir)
self.log('Parsed HTML written to:', out_dir) self.log('Parsed HTML written to:', out_dir)
self.input_plugin.specialize(self.oeb, self.opts, self.log,
self.output_fmt)
pr(0., _('Running transforms on ebook...')) pr(0., _('Running transforms on ebook...'))

View File

@ -19,7 +19,7 @@ class RecipeInput(InputFormatPlugin):
name = 'Recipe Input' name = 'Recipe Input'
author = 'Kovid Goyal' author = 'Kovid Goyal'
description = _('Download periodical content from the internet') description = _('Download periodical content from the internet')
file_types = set(['recipe']) file_types = set(['recipe', 'downloaded_recipe'])
recommendations = set([ recommendations = set([
('chapter', None, OptionRecommendation.HIGH), ('chapter', None, OptionRecommendation.HIGH),
@ -51,60 +51,79 @@ class RecipeInput(InputFormatPlugin):
def convert(self, recipe_or_file, opts, file_ext, log, def convert(self, recipe_or_file, opts, file_ext, log,
accelerators): accelerators):
from calibre.web.feeds.recipes import compile_recipe from calibre.web.feeds.recipes import compile_recipe
from calibre.web.feeds.recipes.collection import \ opts.output_profile.flow_size = 0
get_builtin_recipe_by_title if file_ext == 'downloaded_recipe':
if os.access(recipe_or_file, os.R_OK): from calibre.utils.zipfile import ZipFile
recipe = compile_recipe(open(recipe_or_file, 'rb').read()) zf = ZipFile(recipe_or_file, 'r')
zf.extractall()
zf.close()
self.recipe_source = open('download.recipe', 'rb').read()
recipe = compile_recipe(self.recipe_source)
self.recipe_object = recipe(opts, log, self.report_progress)
else: else:
title = getattr(opts, 'original_recipe_input_arg', recipe_or_file) if os.access(recipe_or_file, os.R_OK):
title = os.path.basename(title).rpartition('.')[0] self.recipe_source = open(recipe_or_file, 'rb').read()
raw = get_builtin_recipe_by_title(title, log=log, recipe = compile_recipe(self.recipe_source)
download_recipe=not opts.dont_download_recipe) else:
builtin = False from calibre.web.feeds.recipes.collection import \
try: get_builtin_recipe_by_title
recipe = compile_recipe(raw) title = getattr(opts, 'original_recipe_input_arg', recipe_or_file)
if recipe.requires_version > numeric_version: title = os.path.basename(title).rpartition('.')[0]
log.warn(
'Downloaded recipe needs calibre version at least: %s' % \
('.'.join(recipe.requires_version)))
builtin = True
except:
log.exception('Failed to compile downloaded recipe. Falling '
'back to builtin one')
builtin = True
if builtin:
raw = get_builtin_recipe_by_title(title, log=log, raw = get_builtin_recipe_by_title(title, log=log,
download_recipe=False) download_recipe=not opts.dont_download_recipe)
if raw is None: builtin = False
raise ValueError('Failed to find builtin recipe: '+title) try:
recipe = compile_recipe(raw) recipe = compile_recipe(raw)
self.recipe_source = raw
if recipe.requires_version > numeric_version:
log.warn(
'Downloaded recipe needs calibre version at least: %s' % \
('.'.join(recipe.requires_version)))
builtin = True
except:
log.exception('Failed to compile downloaded recipe. Falling '
'back to builtin one')
builtin = True
if builtin:
raw = get_builtin_recipe_by_title(title, log=log,
download_recipe=False)
if raw is None:
raise ValueError('Failed to find builtin recipe: '+title)
recipe = compile_recipe(raw)
self.recipe_source = raw
if recipe is None:
raise ValueError('%r is not a valid recipe file or builtin recipe' %
recipe_or_file)
ro = recipe(opts, log, self.report_progress)
disabled = getattr(ro, 'recipe_disabled', None)
if disabled is not None:
raise RecipeDisabled(disabled)
ro.download()
self.recipe_object = ro
if recipe is None:
raise ValueError('%r is not a valid recipe file or builtin recipe' %
recipe_or_file)
ro = recipe(opts, log, self.report_progress)
disabled = getattr(ro, 'recipe_disabled', None)
if disabled is not None:
raise RecipeDisabled(disabled)
ro.download()
self.recipe_object = ro
for key, val in recipe.conversion_options.items(): for key, val in recipe.conversion_options.items():
setattr(opts, key, val) setattr(opts, key, val)
opts.output_profile.flow_size = 0
for f in os.listdir('.'): for f in os.listdir('.'):
if f.endswith('.opf'): if f.endswith('.opf'):
return os.path.abspath(f) return os.path.abspath(f)
def postprocess_book(self, oeb, opts, log): def postprocess_book(self, oeb, opts, log):
if self.recipe_object is not None:
self.recipe_object.postprocess_book(oeb, opts, log)
def specialize(self, oeb, opts, log, output_fmt):
if opts.no_inline_navbars: if opts.no_inline_navbars:
from calibre.ebooks.oeb.base import XPath from calibre.ebooks.oeb.base import XPath
for item in oeb.spine: for item in oeb.spine:
for div in XPath('//h:div[contains(@class, "calibre_navbar")]')(item.data): for div in XPath('//h:div[contains(@class, "calibre_navbar")]')(item.data):
div.getparent().remove(div) div.getparent().remove(div)
self.recipe_object.postprocess_book(oeb, opts, log)
def save_download(self, zf):
raw = self.recipe_source
if isinstance(raw, unicode):
raw = raw.encode('utf-8')
zf.writestr('download.recipe', raw)