IGN:feeds2epub

This commit is contained in:
Kovid Goyal 2008-09-17 21:02:07 -07:00
parent e3dd51c3f8
commit b6a8c3d0a4
9 changed files with 136 additions and 23 deletions

View File

@ -55,6 +55,11 @@ help on using this feature.
''').replace('\n', ' '))
structure('chapter_mark', ['--chapter-mark'], choices=['pagebreak', 'rule', 'both'],
default='pagebreak', help=_('Specify how to mark detected chapters. A value of "pagebreak" will insert page breaks before chapters. A value of "rule" will insert a line before chapters. A value of "none" will disable chapter marking and a value of "both" will use both page breaks and lines to mark chapters.'))
structure('cover', ['--cover'], default=None,
help=_('Path to the cover to be used for this book'))
structure('prefer_metadata_cover', ['--prefer-metadata-cover'], default=False,
action='store_true',
help=_('Use the cover detected from the source file in preference to the specified cover.'))
toc = c.add_group('toc',
_('''\

View File

@ -0,0 +1,68 @@
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en'
'''
Convert periodical content into EPUB ebooks.
'''
import sys, glob, os
from calibre.web.feeds.main import config as feeds2disk_config, USAGE, run_recipe
from calibre.ebooks.epub.from_html import config as html2epub_config
from calibre.ptempfile import TemporaryDirectory
from calibre.ebooks.epub.from_html import convert as html2epub
from calibre import strftime, sanitize_file_name
def config(defaults=None):
c = feeds2disk_config(defaults=defaults)
c.remove('lrf')
c.remove('epub')
c.remove('output_dir')
c.update(html2epub_config(defaults=defaults))
c.remove('chapter_mark')
return c
def option_parser():
c = config()
return c.option_parser(usage=USAGE)
def convert(opts, recipe_arg, notification=None):
opts.lrf = False
opts.epub = True
opts.chapter_mark = 'none'
if opts.debug:
opts.verbose = 2
parser = option_parser()
with TemporaryDirectory('_feeds2epub') as tdir:
opts.output_dir = tdir
recipe = run_recipe(opts, recipe_arg, parser, notification=notification)
c = config()
recipe_opts = c.parse_string(recipe.html2epub_options)
c.smart_update(recipe_opts, opts)
opts = recipe_opts
opf = glob.glob(os.path.join(tdir, '*.opf'))
if not opf:
raise Exception('Downloading of recipe: %s failed'%recipe_arg)
opf = opf[0]
if opts.output is None:
fname = recipe.title + strftime(recipe.timefmt) + '.epub'
opts.output = os.path.join(os.getcwd(), sanitize_file_name(fname))
print 'Generating epub...'
html2epub(opf, opts, notification=notification)
def main(args=sys.argv):
parser = option_parser()
opts, args = parser.parse_args(args)
if len(args) != 2 and opts.feeds is None:
parser.print_help()
return 1
recipe_arg = args[1] if len(args) > 1 else None
convert(opts, recipe_arg)
return 0
if __name__ == '__main__':
sys.exit(main())

View File

@ -97,14 +97,40 @@ def convert(htmlfile, opts, notification=None):
resource_map, htmlfile_map, generated_toc = parse_content(filelist, opts, tdir)
resources = [os.path.join(tdir, 'content', f) for f in resource_map.values()]
cover_src = None
if mi.cover and os.access(mi.cover, os.R_OK):
shutil.copyfile(mi.cover, os.path.join(tdir, 'content', 'resources', '_cover_'+os.path.splitext(opf.cover)[1]))
cpath = os.path.join(tdir, 'content', 'resources', '_cover_'+os.path.splitext(opf.cover)[1])
shutil.copyfile(opf.cover, cpath)
resources.append(cpath)
mi.cover = cpath
cover_src = mi.cover
else:
mi.cover = None
if opts.cover is not None and not opts.prefer_metadata_cover:
cover_src = opts.cover
if cover_src is not None:
cover_dest = os.path.join(tdir, 'content', 'resources', '_cover_'+os.path.splitext(cover_src)[1])
shutil.copyfile(cover_src, cover_dest)
mi.cover = cover_dest
resources.append(cover_dest)
spine = [htmlfile_map[f.path] for f in filelist]
if mi.cover:
cpath = '/'.join(('resources', os.path.basename(mi.cover)))
cover = '''\
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
<head><title>Cover Page</title></head>
<body>
<div style="text-align:center">
<img src="%s" alt="cover" />
</div>
</body>
</html>'''%cpath
cpath = os.path.join(tdir, 'content', 'calibre_cover_page.html')
with open(cpath, 'wb') as f:
f.write(cover)
spine[0:0] = [os.path.basename(cpath)]
mi.cover = None
mi.cover_data = (None, None)
mi = create_metadata(tdir, mi, spine, resources)
buf = cStringIO.StringIO()
if mi.toc:

View File

@ -667,7 +667,7 @@ def create_metadata(basepath, mi, filelist, resources):
Create an OPF metadata object with correct spine and manifest.
'''
mi = OPFCreator(basepath, mi)
entries = [('content/'+f, None) for f in filelist] + [(f, None) for f in resources]
entries = [('content/'+f, 'application/xhtml+xml') for f in filelist] + [(f, None) for f in resources]
mi.create_manifest(entries)
mi.create_spine(['content/'+f for f in filelist])
return mi

View File

@ -54,8 +54,6 @@ def main(args=sys.argv, notification=None, handler=None):
opts.output = os.path.join(os.getcwd(), sanitize_file_name(fname))
print 'Generating LRF...'
process_file(htmlfile, opts)
if os.stat(opts.output).st_size < 100: # This can happen if the OS runs out of file handles
raise ConversionError(_('Failed to convert downloaded recipe: ')+recipe_arg)
return 0
if __name__ == '__main__':

View File

@ -36,6 +36,7 @@ entry_points = {
'web2disk = calibre.web.fetch.simple:main',
'feeds2disk = calibre.web.feeds.main:main',
'feeds2lrf = calibre.ebooks.lrf.feeds.convert_from:main',
'feeds2epub = calibre.ebooks.epub.from_feeds:main',
'web2lrf = calibre.ebooks.lrf.web.convert_from:main',
'pdf2lrf = calibre.ebooks.lrf.pdf.convert_from:main',
'mobi2lrf = calibre.ebooks.lrf.mobi.convert_from:main',
@ -173,6 +174,7 @@ def setup_completion(fatal_errors):
from calibre.ebooks.lrf.comic.convert_from import option_parser as comicop
from calibre.ebooks.epub.from_html import option_parser as html2epub
from calibre.ebooks.html import option_parser as html2oeb
from calibre.ebooks.epub.from_feeds import option_parser as feeds2epub
f = open_file('/etc/bash_completion.d/libprs500')
f.close()
@ -210,6 +212,7 @@ def setup_completion(fatal_errors):
f.write(opts_and_exts('comic2lrf', comicop, ['cbz', 'cbr']))
f.write(opts_and_words('feeds2disk', feeds2disk, feed_titles))
f.write(opts_and_words('feeds2lrf', feeds2lrf, feed_titles))
f.write(opts_and_words('feeds2lrf', feeds2epub, feed_titles))
f.write(opts_and_exts('html2epub', html2epub, ['html', 'htm', 'xhtm', 'xhtml', 'opf']))
f.write(opts_and_exts('html2oeb', html2oeb, ['html', 'htm', 'xhtm', 'xhtml']))
f.write('''

View File

@ -218,6 +218,15 @@ class OptionSet(object):
self.preferences.remove(pref)
self.preferences.append(pref)
def smart_update(self, opts1, opts2):
'''
Updates the preference values in opts1 using only the non-default preference values in opts2.
'''
for pref in self.preferences:
new = getattr(opts2, pref.name, pref.default)
if new != pref.default:
setattr(opts1, pref.name, new)
def remove_opt(self, name):
if name in self.preferences:
self.preferences.remove(name)
@ -339,7 +348,8 @@ class ConfigInterface(object):
self.option_set = OptionSet(description=description)
self.add_opt = self.option_set.add_opt
self.add_group = self.option_set.add_group
self.remove_opt = self.option_set.remove_opt
self.remove_opt = self.remove = self.option_set.remove_opt
self.parse_string = self.option_set.parse_string
def update(self, other):
self.option_set.update(other.option_set)
@ -348,6 +358,9 @@ class ConfigInterface(object):
return self.option_set.option_parser(user_defaults=self.parse(),
usage=usage, gui_mode=gui_mode)
def smart_update(self, opts1, opts2):
self.option_set.smart_update(opts1, opts2)
class Config(ConfigInterface):
'''
A file based configuration.

View File

@ -30,31 +30,31 @@ def config(defaults=None):
web2disk('no_stylesheets', ['--dont-download-stylesheets'], action='store_true', default=False,
help=_('Do not download CSS stylesheets.'))
c.add_option('feeds', ['--feeds'], default=None,
c.add_opt('feeds', ['--feeds'], default=None,
help=_('''Specify a list of feeds to download. For example:
"['http://feeds.newsweek.com/newsweek/TopNews', 'http://feeds.newsweek.com/headlines/politics']"
If you specify this option, any argument to %prog is ignored and a default recipe is used to download the feeds.'''))
c.add_option('verbose', ['-v', '--verbose'], default=0, action='count',
c.add_opt('verbose', ['-v', '--verbose'], default=0, action='count',
help=_('''Be more verbose while processing.'''))
c.add_option('title', ['--title'], default=None,
c.add_opt('title', ['--title'], default=None,
help=_('The title for this recipe. Used as the title for any ebooks created from the downloaded feeds.'))
c.add_option('username', ['-u', '--username'], default=None,
c.add_opt('username', ['-u', '--username'], default=None,
help=_('Username for sites that require a login to access content.'))
c.add_option('password', ['-p', '--password'], default=None,
c.add_opt('password', ['-p', '--password'], default=None,
help=_('Password for sites that require a login to access content.'))
c.add_option('lrf', ['--lrf'], default=False, action='store_true',
c.add_opt('lrf', ['--lrf'], default=False, action='store_true',
help='Optimize fetching for subsequent conversion to LRF.')
c.add_option('epub', ['--epub'], default=False, action='store_true',
c.add_opt('epub', ['--epub'], default=False, action='store_true',
help='Optimize fetching for subsequent conversion to EPUB.')
c.add_option('recursions', ['--recursions'], default=0,
c.add_opt('recursions', ['--recursions'], default=0,
help=_('Number of levels of links to follow on webpages that are linked to from feeds. Defaul %default'))
c.add_option('output_dir', ['--output-dir'], default='.',
c.add_opt('output_dir', ['--output-dir'], default='.',
help=_('The directory in which to store the downloaded feeds. Defaults to the current directory.'))
c.add_option('no_progress_bar', ['--no-progress-bar'], default=False, action='store_true',
c.add_opt('no_progress_bar', ['--no-progress-bar'], default=False, action='store_true',
help=_("Don't show the progress bar"))
c.add_option('debug', ['--debug'], action='store_true', default=False,
c.add_opt('debug', ['--debug'], action='store_true', default=False,
help=_('Very verbose output, useful for debugging.'))
c.add_option('test', ['--test'], action='store_true', default=False,
c.add_opt('test', ['--test'], action='store_true', default=False,
help=_('Useful for recipe development. Forces max_articles_per_feed to 2 and downloads at most 2 feeds.'))
return c

View File

@ -413,7 +413,7 @@ class BasicNewsRecipe(object, LoggingInterface):
defaults = parser.get_default_values()
for opt in options.__dict__.keys():
if getattr(options, opt) != getattr(defaults, opt):
if getattr(options, opt) != getattr(defaults, opt, None):
setattr(self, opt, getattr(options, opt))
if isinstance(self.feeds, basestring):