Implement feeds2mobi

This commit is contained in:
Kovid Goyal 2009-01-20 12:41:39 -08:00
parent b3271a0ded
commit 3ffa6bb88f
9 changed files with 136 additions and 40 deletions

View File

@ -20,6 +20,7 @@ import mechanize
mimetypes.add_type('application/epub+zip', '.epub')
mimetypes.add_type('text/x-sony-bbeb+xml', '.lrs')
mimetypes.add_type('application/x-sony-bbeb', '.lrf')
mimetypes.add_type('application/x-dtbncx+xml', '.ncx')
def to_unicode(raw, encoding='utf-8', errors='strict'):
if isinstance(raw, unicode):

View File

@ -160,7 +160,11 @@ class HTMLProcessor(Processor, Rationalizer):
br.text = u'\u00a0'
if self.opts.profile.remove_object_tags:
for tag in self.root.xpath('//object|//embed'):
for tag in self.root.xpath('//embed'):
tag.getparent().remove(tag)
for tag in self.root.xpath('//object'):
if tag.get('type', '').lower().strip() in ('image/svg+xml',):
continue
tag.getparent().remove(tag)
def save(self):

View File

@ -14,15 +14,18 @@ import sys, os, glob, logging
from calibre.ebooks.epub.from_any import any2epub, formats, USAGE
from calibre.ebooks.epub import config as common_config
from calibre.ptempfile import TemporaryDirectory
from calibre.ebooks.mobi.writer import oeb2mobi, add_mobi_options
from calibre.ebooks.mobi.writer import oeb2mobi, config as mobi_config
def config(defaults=None):
return common_config(defaults=defaults, name='mobi')
c = common_config(defaults=defaults, name='mobi')
c.remove_opt('profile')
mobic = mobi_config(defaults=defaults)
c.update(mobic)
return c
def option_parser(usage=USAGE):
usage = usage % ('Mobipocket', formats())
parser = config().option_parser(usage=usage)
add_mobi_options(parser)
return parser
def any2mobi(opts, path):

View File

@ -0,0 +1,74 @@
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en'
'''
Convert feeds to MOBI ebook
'''
import sys, glob, os
from calibre.web.feeds.main import config as feeds2disk_config, USAGE, run_recipe
from calibre.ebooks.mobi.writer import config as oeb2mobi_config, oeb2mobi
from calibre.ptempfile import TemporaryDirectory
from calibre import strftime, sanitize_file_name
def config(defaults=None):
c = feeds2disk_config(defaults=defaults)
c.remove('lrf')
c.remove('epub')
c.remove('mobi')
c.remove('output_dir')
c.update(oeb2mobi_config(defaults=defaults))
c.remove('encoding')
c.remove('source_profile')
c.add_opt('output', ['-o', '--output'], default=None,
help=_('Output file. Default is derived from input filename.'))
return c
def option_parser():
c = config()
return c.option_parser(usage=USAGE)
def convert(opts, recipe_arg, notification=None):
opts.lrf = False
opts.epub = False
opts.mobi = True
if opts.debug:
opts.verbose = 2
parser = option_parser()
with TemporaryDirectory('_feeds2mobi') as tdir:
opts.output_dir = tdir
recipe = run_recipe(opts, recipe_arg, parser, notification=notification)
c = config()
recipe_opts = c.parse_string(recipe.oeb2mobi_options)
c.smart_update(recipe_opts, opts)
opts = recipe_opts
opf = glob.glob(os.path.join(tdir, '*.opf'))
if not opf:
raise Exception('Downloading of recipe: %s failed'%recipe_arg)
opf = opf[0]
if opts.output is None:
fname = recipe.title + strftime(recipe.timefmt) + '.mobi'
opts.output = os.path.join(os.getcwd(), sanitize_file_name(fname))
print 'Generating MOBI...'
opts.encoding = 'utf-8'
opts.source_profile = 'Browser'
oeb2mobi(opts, opf)
def main(args=sys.argv, notification=None, handler=None):
parser = option_parser()
opts, args = parser.parse_args(args)
if len(args) != 2 and opts.feeds is None:
parser.print_help()
return 1
recipe_arg = args[1] if len(args) > 1 else None
convert(opts, recipe_arg, notification=notification)
return 0
if __name__ == '__main__':
sys.exit(main())

View File

@ -34,8 +34,7 @@ from calibre.ebooks.mobi.palmdoc import compress_doc
from calibre.ebooks.mobi.langcodes import iana2mobi
from calibre.ebooks.mobi.mobiml import MBP_NS, MBP, MobiMLizer
from calibre.customize.ui import run_plugins_on_postprocess
from calibre.utils.config import OptionParser
from optparse import OptionGroup
from calibre.utils.config import Config, StringConfig
# TODO:
# - Allow override CSS (?)
@ -502,44 +501,45 @@ class MobiWriter(object):
self._write(record)
def add_mobi_options(parser):
profiles = Context.PROFILES.keys()
profiles.sort()
profiles = ', '.join(profiles)
group = OptionGroup(parser, _('Mobipocket'),
_('Mobipocket-specific options.'))
group.add_option(
'-c', '--compress', default=False, action='store_true',
help=_('Compress file text using PalmDOC compression. '
def config(defaults=None):
desc = _('Options to control the conversion to MOBI')
_profiles = list(sorted(Context.PROFILES.keys()))
if defaults is None:
c = Config('mobi', desc)
else:
c = StringConfig(defaults, desc)
mobi = c.add_group('mobipocket', _('Mobipocket-specific options.'))
mobi('compress', ['--compress'], default=False,
help=_('Compress file text using PalmDOC compression. '
'Results in smaller files, but takes a long time to run.'))
group.add_option(
'-r', '--rescale-images', default=False, action='store_true',
mobi('rescale_images', ['--rescale-images'], default=False,
help=_('Modify images to meet Palm device size limitations.'))
group.add_option(
'--toc-title', default=None, action='store',
help=_('Title for any generated in-line table of contents.'))
parser.add_option_group(group)
group = OptionGroup(parser, _('Profiles'), _('Device renderer profiles. '
'Affects conversion of default font sizes and rasterization '
'resolution. Valid profiles are: %s.') % profiles)
group.add_option(
'--source-profile', default='Browser', metavar='PROFILE',
help=_("Source renderer profile. Default is 'Browser'."))
group.add_option(
'--dest-profile', default='CybookG3', metavar='PROFILE',
help=_("Destination renderer profile. Default is 'CybookG3'."))
parser.add_option_group(group)
return
mobi('toc_title', ['--toc-title'], default=None,
help=_('Title for any generated in-line table of contents.'))
profiles = c.add_group('profiles', _('Device renderer profiles. '
'Affects conversion of font sizes, image rescaling and rasterization '
'of tables. Valid profiles are: %s.') % ', '.join(_profiles))
profiles('source_profile', ['--source-profile'],
default='Browser', choices=_profiles,
help=_("Source renderer profile. Default is %default."))
profiles('dest_profile', ['--dest-profile'],
default='CybookG3', choices=_profiles,
help=_("Destination renderer profile. Default is %default."))
c.add_opt('encoding', ['--encoding'], default=None,
help=_('Character encoding for HTML files. Default is to auto detect.'))
return c
def option_parser():
parser = OptionParser(usage=_('%prog [options] OPFFILE'))
c = config()
parser = c.option_parser(usage='%prog '+_('[options]')+' file.opf')
parser.add_option(
'-o', '--output', default=None,
help=_('Output file. Default is derived from input filename.'))
parser.add_option(
'-v', '--verbose', default=0, action='count',
help=_('Useful for debugging.'))
add_mobi_options(parser)
return parser
def oeb2mobi(opts, inpath):

View File

@ -223,8 +223,11 @@ class Stylizer(object):
for key in composition:
style[key] = 'inherit'
else:
primitives = [v.cssText for v in cssvalue]
primitites.reverse()
try:
primitives = [v.cssText for v in cssvalue]
except TypeError:
primitives = [cssvalue.cssText]
primitives.reverse()
value = primitives.pop()
for key in composition:
if cssproperties.cssvalues[key](value):

View File

@ -40,6 +40,7 @@ entry_points = {
'calibre-server = calibre.library.server:main',
'feeds2lrf = calibre.ebooks.lrf.feeds.convert_from:main',
'feeds2epub = calibre.ebooks.epub.from_feeds:main',
'feeds2mobi = calibre.ebooks.mobi.from_feeds:main',
'web2lrf = calibre.ebooks.lrf.web.convert_from:main',
'pdf2lrf = calibre.ebooks.lrf.pdf.convert_from:main',
'mobi2lrf = calibre.ebooks.lrf.mobi.convert_from:main',
@ -189,6 +190,7 @@ def setup_completion(fatal_errors):
from calibre.ebooks.html import option_parser as html2oeb
from calibre.ebooks.odt.to_oeb import option_parser as odt2oeb
from calibre.ebooks.epub.from_feeds import option_parser as feeds2epub
from calibre.ebooks.mobi.from_feeds import option_parser as feeds2mobi
from calibre.ebooks.epub.from_any import option_parser as any2epub
from calibre.ebooks.lit.from_any import option_parser as any2lit
from calibre.ebooks.epub.from_comic import option_parser as comic2epub
@ -219,7 +221,7 @@ def setup_completion(fatal_errors):
f.write(opts_and_exts('any2epub', any2epub, any_formats))
f.write(opts_and_exts('any2lit', any2lit, any_formats))
f.write(opts_and_exts('any2mobi', any2mobi, any_formats))
f.write(opts_and_exts('oeb2mobi', oeb2mobi, ['mobi', 'prc']))
f.write(opts_and_exts('oeb2mobi', oeb2mobi, ['opf']))
f.write(opts_and_exts('lrf2lrs', lrf2lrsop, ['lrf']))
f.write(opts_and_exts('lrf-meta', metaop, ['lrf']))
f.write(opts_and_exts('rtf-meta', metaop, ['rtf']))
@ -239,7 +241,8 @@ def setup_completion(fatal_errors):
f.write(opts_and_exts('comic2pdf', comic2epub, ['cbz', 'cbr']))
f.write(opts_and_words('feeds2disk', feeds2disk, feed_titles))
f.write(opts_and_words('feeds2lrf', feeds2lrf, feed_titles))
f.write(opts_and_words('feeds2lrf', feeds2epub, feed_titles))
f.write(opts_and_words('feeds2epub', feeds2epub, feed_titles))
f.write(opts_and_words('feeds2mobi', feeds2mobi, feed_titles))
f.write(opts_and_exts('html2epub', html2epub, ['html', 'htm', 'xhtm', 'xhtml', 'opf']))
f.write(opts_and_exts('html2oeb', html2oeb, ['html', 'htm', 'xhtm', 'xhtml']))
f.write(opts_and_exts('odt2oeb', odt2oeb, ['odt']))

View File

@ -45,6 +45,8 @@ If you specify this option, any argument to %prog is ignored and a default recip
help='Optimize fetching for subsequent conversion to LRF.')
c.add_opt('epub', ['--epub'], default=False, action='store_true',
help='Optimize fetching for subsequent conversion to EPUB.')
c.add_opt('mobi', ['--mobi'], default=False, action='store_true',
help='Optimize fetching for subsequent conversion to MOBI.')
c.add_opt('recursions', ['--recursions'], default=0,
help=_('Number of levels of links to follow on webpages that are linked to from feeds. Defaul %default'))
c.add_opt('output_dir', ['--output-dir'], default='.',

View File

@ -20,7 +20,7 @@ from PyQt4.QtWebKit import QWebPage
from calibre import browser, __appname__, iswindows, LoggingInterface, \
strftime, __version__, preferred_encoding
from calibre.ebooks.BeautifulSoup import BeautifulSoup, NavigableString, CData, Tag
from calibre.ebooks.metadata.opf import OPFCreator
from calibre.ebooks.metadata.opf2 import OPFCreator
from calibre.ebooks.lrf import entity_to_unicode
from calibre.ebooks.metadata.toc import TOC
from calibre.ebooks.metadata import MetaInformation
@ -152,6 +152,8 @@ class BasicNewsRecipe(object, LoggingInterface):
#: Options to pass to html2epub to customize generation of EPUB ebooks.
html2epub_options = ''
#: Options to pass to oeb2mobi to customize generation of MOBI ebooks.
oeb2mobi_options = ''
#: List of tags to be removed. Specified tags are removed from downloaded HTML.
#: A tag is specified as a dictionary of the form::
@ -876,6 +878,7 @@ class BasicNewsRecipe(object, LoggingInterface):
manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))]
manifest.append(os.path.join(dir, 'index.html'))
manifest.append(os.path.join(dir, 'index.ncx'))
cpath = getattr(self, 'cover_path', None)
if cpath is None:
pf = PersistentTemporaryFile('_recipe_cover.jpg')
@ -885,6 +888,9 @@ class BasicNewsRecipe(object, LoggingInterface):
opf.cover = cpath
manifest.append(cpath)
opf.create_manifest_from_files_in(manifest)
for mani in opf.manifest:
if mani.path.endswith('.ncx'):
mani.id = 'ncx'
entries = ['index.html']
toc = TOC(base_path=dir)