IGN:feeds2epub

2025-07-08 10:44:09 -04:00 · 2008-09-17 21:02:07 -07:00 · 2008-09-17 21:02:07 -07:00 · b6a8c3d0a4
commit b6a8c3d0a4
parent e3dd51c3f8
9 changed files with 136 additions and 23 deletions
--- a/src/calibre/ebooks/epub/init.py
+++ b/src/calibre/ebooks/epub/init.py
@ -55,6 +55,11 @@ help on using this feature.
 ''').replace('\n', ' '))
    structure('chapter_mark', ['--chapter-mark'], choices=['pagebreak', 'rule', 'both'],
              default='pagebreak', help=_('Specify how to mark detected chapters. A value of "pagebreak" will insert page breaks before chapters. A value of "rule" will insert a line before chapters. A value of "none" will disable chapter marking and a value of "both" will use both page breaks and lines to mark chapters.'))
+    structure('cover', ['--cover'], default=None,
+              help=_('Path to the cover to be used for this book'))
+    structure('prefer_metadata_cover', ['--prefer-metadata-cover'], default=False,
+              action='store_true',
+              help=_('Use the cover detected from the source file in preference to the specified cover.'))
    
    toc = c.add_group('toc', 
        _('''\
--- a/src/calibre/ebooks/epub/from_feeds.py
+++ b/src/calibre/ebooks/epub/from_feeds.py
@ -0,0 +1,68 @@
+from __future__ import with_statement
+__license__   = 'GPL v3'
+__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
+__docformat__ = 'restructuredtext en'
+
+'''
+Convert periodical content into EPUB ebooks.
+'''
+import sys, glob, os
+from calibre.web.feeds.main import config as feeds2disk_config, USAGE, run_recipe
+from calibre.ebooks.epub.from_html import config as html2epub_config
+from calibre.ptempfile import TemporaryDirectory
+from calibre.ebooks.epub.from_html import convert as html2epub
+from calibre import strftime, sanitize_file_name
+
+def config(defaults=None):
+    c = feeds2disk_config(defaults=defaults)
+    c.remove('lrf')
+    c.remove('epub')
+    c.remove('output_dir')
+    c.update(html2epub_config(defaults=defaults))
+    c.remove('chapter_mark')
+    return c
+
+def option_parser():
+    c = config()
+    return c.option_parser(usage=USAGE)
+
+def convert(opts, recipe_arg, notification=None):
+    opts.lrf  = False
+    opts.epub = True
+    opts.chapter_mark = 'none'
+    if opts.debug:
+        opts.verbose = 2
+    parser = option_parser()
+    with TemporaryDirectory('_feeds2epub') as tdir:
+        opts.output_dir = tdir
+        recipe = run_recipe(opts, recipe_arg, parser, notification=notification)
+        c = config()
+        recipe_opts = c.parse_string(recipe.html2epub_options)
+        c.smart_update(recipe_opts, opts)
+        opts = recipe_opts
+        opf = glob.glob(os.path.join(tdir, '*.opf'))
+        if not opf:
+            raise Exception('Downloading of recipe: %s failed'%recipe_arg)
+        opf = opf[0]
+        
+        if opts.output is None:
+            fname = recipe.title + strftime(recipe.timefmt) + '.epub'
+            opts.output = os.path.join(os.getcwd(), sanitize_file_name(fname))
+        
+        print 'Generating epub...'
+        html2epub(opf, opts, notification=notification)
+    
+
+def main(args=sys.argv):
+    parser = option_parser()
+    opts, args = parser.parse_args(args)
+    if len(args) != 2 and opts.feeds is None:
+        parser.print_help()
+        return 1
+    recipe_arg = args[1] if len(args) > 1 else None
+    convert(opts, recipe_arg)
+        
+    return 0
+
+if __name__ == '__main__':
+    sys.exit(main())
--- a/src/calibre/ebooks/epub/from_html.py
+++ b/src/calibre/ebooks/epub/from_html.py
@ -97,14 +97,40 @@ def convert(htmlfile, opts, notification=None):
        resource_map, htmlfile_map, generated_toc = parse_content(filelist, opts, tdir)
        resources = [os.path.join(tdir, 'content', f) for f in resource_map.values()]
        
+        cover_src = None
        if mi.cover and os.access(mi.cover, os.R_OK):
-            shutil.copyfile(mi.cover, os.path.join(tdir, 'content', 'resources', '_cover_'+os.path.splitext(opf.cover)[1]))
-            cpath = os.path.join(tdir, 'content', 'resources', '_cover_'+os.path.splitext(opf.cover)[1])
-            shutil.copyfile(opf.cover, cpath)
-            resources.append(cpath)
-            mi.cover = cpath
+            cover_src = mi.cover
+        else:
+            mi.cover = None
+        if opts.cover is not None and not opts.prefer_metadata_cover:
+            cover_src = opts.cover
+        
+        if cover_src is not None:
+            cover_dest = os.path.join(tdir, 'content', 'resources', '_cover_'+os.path.splitext(cover_src)[1])
+            shutil.copyfile(cover_src, cover_dest)
+            mi.cover = cover_dest
+            resources.append(cover_dest)
            
        spine = [htmlfile_map[f.path] for f in filelist]
+        if mi.cover:
+            cpath = '/'.join(('resources', os.path.basename(mi.cover)))
+            cover = '''\
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
+    <head><title>Cover Page</title></head>
+    <body>
+        <div style="text-align:center">
+            <img src="%s" alt="cover" />
+        </div>
+    </body>
+</html>'''%cpath
+            cpath = os.path.join(tdir, 'content', 'calibre_cover_page.html')
+            with open(cpath, 'wb') as f:
+                f.write(cover)
+            spine[0:0] = [os.path.basename(cpath)]
+            mi.cover = None
+            mi.cover_data = (None, None)
+            
+            
        mi = create_metadata(tdir, mi, spine, resources)
        buf = cStringIO.StringIO()
        if mi.toc:
--- a/src/calibre/ebooks/html.py
+++ b/src/calibre/ebooks/html.py
@ -667,7 +667,7 @@ def create_metadata(basepath, mi, filelist, resources):
    Create an OPF metadata object with correct spine and manifest.
    '''
    mi = OPFCreator(basepath, mi)
-    entries = [('content/'+f, None) for f in filelist] + [(f, None) for f in resources]
+    entries = [('content/'+f, 'application/xhtml+xml') for f in filelist] + [(f, None) for f in resources]
    mi.create_manifest(entries)
    mi.create_spine(['content/'+f for f in filelist])
    return mi
--- a/src/calibre/ebooks/lrf/feeds/convert_from.py
+++ b/src/calibre/ebooks/lrf/feeds/convert_from.py
@ -54,8 +54,6 @@ def main(args=sys.argv, notification=None, handler=None):
            opts.output = os.path.join(os.getcwd(), sanitize_file_name(fname))
        print 'Generating LRF...'
        process_file(htmlfile, opts)
-        if os.stat(opts.output).st_size < 100: # This can happen if the OS runs out of file handles
-            raise ConversionError(_('Failed to convert downloaded recipe: ')+recipe_arg)
    return 0

 if __name__ == '__main__':
--- a/src/calibre/linux.py
+++ b/src/calibre/linux.py
@ -36,6 +36,7 @@ entry_points = {
                             'web2disk  = calibre.web.fetch.simple:main',
                             'feeds2disk = calibre.web.feeds.main:main',
                             'feeds2lrf = calibre.ebooks.lrf.feeds.convert_from:main',
+                             'feeds2epub = calibre.ebooks.epub.from_feeds:main',
                             'web2lrf   = calibre.ebooks.lrf.web.convert_from:main',
                             'pdf2lrf   = calibre.ebooks.lrf.pdf.convert_from:main',
                             'mobi2lrf  = calibre.ebooks.lrf.mobi.convert_from:main',
@ -173,6 +174,7 @@ def setup_completion(fatal_errors):
        from calibre.ebooks.lrf.comic.convert_from import option_parser as comicop
        from calibre.ebooks.epub.from_html import option_parser as html2epub
        from calibre.ebooks.html import option_parser as html2oeb
+        from calibre.ebooks.epub.from_feeds import option_parser as feeds2epub 

        f = open_file('/etc/bash_completion.d/libprs500')
        f.close()
@ -210,6 +212,7 @@ def setup_completion(fatal_errors):
        f.write(opts_and_exts('comic2lrf', comicop, ['cbz', 'cbr']))
        f.write(opts_and_words('feeds2disk', feeds2disk, feed_titles))
        f.write(opts_and_words('feeds2lrf', feeds2lrf, feed_titles))
+        f.write(opts_and_words('feeds2lrf', feeds2epub, feed_titles))
        f.write(opts_and_exts('html2epub', html2epub, ['html', 'htm', 'xhtm', 'xhtml', 'opf']))
        f.write(opts_and_exts('html2oeb', html2oeb, ['html', 'htm', 'xhtm', 'xhtml']))
        f.write('''
--- a/src/calibre/utils/config.py
+++ b/src/calibre/utils/config.py
@ -218,6 +218,15 @@ class OptionSet(object):
                self.preferences.remove(pref)
            self.preferences.append(pref)
            
+    def smart_update(self, opts1, opts2):
+        '''
+        Updates the preference values in opts1 using only the non-default preference values in opts2.
+        '''
+        for pref in self.preferences:
+            new = getattr(opts2, pref.name, pref.default)
+            if new != pref.default:
+                setattr(opts1, pref.name, new)
+            
    def remove_opt(self, name):
        if name in self.preferences:
            self.preferences.remove(name)
@ -339,7 +348,8 @@ class ConfigInterface(object):
        self.option_set       = OptionSet(description=description)
        self.add_opt          = self.option_set.add_opt
        self.add_group        = self.option_set.add_group
-        self.remove_opt       = self.option_set.remove_opt
+        self.remove_opt       = self.remove = self.option_set.remove_opt
+        self.parse_string     = self.option_set.parse_string
        
    def update(self, other):
        self.option_set.update(other.option_set)
@ -348,6 +358,9 @@ class ConfigInterface(object):
        return self.option_set.option_parser(user_defaults=self.parse(), 
                                             usage=usage, gui_mode=gui_mode)
    
+    def smart_update(self, opts1, opts2):
+        self.option_set.smart_update(opts1, opts2)
+    
 class Config(ConfigInterface):
    '''
    A file based configuration.
--- a/src/calibre/web/feeds/main.py
+++ b/src/calibre/web/feeds/main.py
@ -30,31 +30,31 @@ def config(defaults=None):
    web2disk('no_stylesheets', ['--dont-download-stylesheets'], action='store_true', default=False,
              help=_('Do not download CSS stylesheets.'))
    
-    c.add_option('feeds', ['--feeds'], default=None,
+    c.add_opt('feeds', ['--feeds'], default=None,
                 help=_('''Specify a list of feeds to download. For example: 
 "['http://feeds.newsweek.com/newsweek/TopNews', 'http://feeds.newsweek.com/headlines/politics']"
 If you specify this option, any argument to %prog is ignored and a default recipe is used to download the feeds.'''))
-    c.add_option('verbose', ['-v', '--verbose'], default=0, action='count',
+    c.add_opt('verbose', ['-v', '--verbose'], default=0, action='count',
                 help=_('''Be more verbose while processing.'''))
-    c.add_option('title', ['--title'], default=None,
+    c.add_opt('title', ['--title'], default=None,
                 help=_('The title for this recipe. Used as the title for any ebooks created from the downloaded feeds.'))
-    c.add_option('username', ['-u', '--username'], default=None, 
+    c.add_opt('username', ['-u', '--username'], default=None, 
                 help=_('Username for sites that require a login to access content.'))
-    c.add_option('password', ['-p', '--password'], default=None, 
+    c.add_opt('password', ['-p', '--password'], default=None, 
                 help=_('Password for sites that require a login to access content.'))
-    c.add_option('lrf', ['--lrf'], default=False, action='store_true', 
+    c.add_opt('lrf', ['--lrf'], default=False, action='store_true', 
                 help='Optimize fetching for subsequent conversion to LRF.')
-    c.add_option('epub', ['--epub'], default=False, action='store_true', 
+    c.add_opt('epub', ['--epub'], default=False, action='store_true', 
                 help='Optimize fetching for subsequent conversion to EPUB.')
-    c.add_option('recursions', ['--recursions'], default=0,
+    c.add_opt('recursions', ['--recursions'], default=0,
                 help=_('Number of levels of links to follow on webpages that are linked to from feeds. Defaul %default'))
-    c.add_option('output_dir', ['--output-dir'], default='.', 
+    c.add_opt('output_dir', ['--output-dir'], default='.', 
                 help=_('The directory in which to store the downloaded feeds. Defaults to the current directory.'))
-    c.add_option('no_progress_bar', ['--no-progress-bar'], default=False, action='store_true',
+    c.add_opt('no_progress_bar', ['--no-progress-bar'], default=False, action='store_true',
                 help=_("Don't show the progress bar"))
-    c.add_option('debug', ['--debug'], action='store_true', default=False,
+    c.add_opt('debug', ['--debug'], action='store_true', default=False,
                 help=_('Very verbose output, useful for debugging.'))
-    c.add_option('test', ['--test'], action='store_true', default=False, 
+    c.add_opt('test', ['--test'], action='store_true', default=False, 
                 help=_('Useful for recipe development. Forces max_articles_per_feed to 2 and downloads at most 2 feeds.'))
    
    return c
--- a/src/calibre/web/feeds/news.py
+++ b/src/calibre/web/feeds/news.py
@ -413,7 +413,7 @@ class BasicNewsRecipe(object, LoggingInterface):
        defaults = parser.get_default_values()
        
        for opt in options.__dict__.keys():
-            if getattr(options, opt) != getattr(defaults, opt):
+            if getattr(options, opt) != getattr(defaults, opt, None):
                setattr(self, opt, getattr(options, opt))
        
        if isinstance(self.feeds, basestring):