ODT Input plugin

2025-07-09 03:04:10 -04:00 · 2009-04-19 16:32:48 -07:00 · 2009-04-19 16:32:48 -07:00 · 8be1892c4d
commit 8be1892c4d
parent 6411a81fef
3 changed files with 69 additions and 73 deletions
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@ -282,6 +282,7 @@ from calibre.ebooks.pdf.input import PDFInput
 from calibre.ebooks.txt.input import TXTInput
 from calibre.ebooks.lit.input import LITInput
 from calibre.ebooks.fb2.input import FB2Input
 from calibre.ebooks.odt.input import ODTInput
 from calibre.ebooks.html.input import HTMLInput
 from calibre.ebooks.oeb.output import OEBOutput
 from calibre.ebooks.txt.output import TXTOutput
@ -290,7 +291,7 @@ from calibre.customize.profiles import input_profiles, output_profiles
 plugins = [HTML2ZIP, EPUBInput, MOBIInput, PDFInput, HTMLInput,
        TXTInput, OEBOutput, TXTOutput, PDFOutput, LITInput,
-        FB2Input]
+        FB2Input, ODTInput]
 plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
                                        x.__name__.endswith('MetadataReader')]
 plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
--- a/src/calibre/ebooks/odt/input.py
+++ b/src/calibre/ebooks/odt/input.py
@ -0,0 +1,67 @@
 from __future__ import with_statement
 __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
 __docformat__ = 'restructuredtext en'
 '''
 Convert an ODT file into a Open Ebook
 '''
 import os
 from odf.odf2xhtml import ODF2XHTML
 from calibre import CurrentDir, walk
 from calibre.customize.conversion import InputFormatPlugin
 class Extract(ODF2XHTML):
    def extract_pictures(self, zf):
        if not os.path.exists('Pictures'):
            os.makedirs('Pictures')
        for name in zf.namelist():
            if name.startswith('Pictures'):
                data = zf.read(name)
                with open(name, 'wb') as f:
                    f.write(data)
    def __call__(self, stream, odir):
        from calibre.utils.zipfile import ZipFile
        from calibre.ebooks.metadata.meta import get_metadata
        from calibre.ebooks.metadata.opf2 import OPFCreator
        if not os.path.exists(odir):
            os.makedirs(odir)
        with CurrentDir(odir):
            print 'Extracting ODT file...'
            html = self.odf2xhtml(stream)
            with open('index.xhtml', 'wb') as f:
                f.write(html.encode('utf-8'))
            zf = ZipFile(stream, 'r')
            self.extract_pictures(zf)
            stream.seek(0)
            mi = get_metadata(stream, 'odt')
            if not mi.title:
                mi.title = _('Unknown')
            if not mi.authors:
                mi.authors = [_('Unknown')]
            opf = OPFCreator(os.path.abspath(os.getcwdu()), mi)
            opf.create_manifest([(os.path.abspath(f), None) for f in walk(os.getcwd())])
            opf.create_spine([os.path.abspath('index.xhtml')])
            with open('metadata.opf', 'wb') as f:
                opf.render(f)
            return os.path.abspath('metadata.opf')
 class ODTInput(InputFormatPlugin):
    name        = 'ODT Input'
    author      = 'Kovid Goyal'
    description = 'Convert ODT (OpenOffice) files to HTML'
    file_types  = set(['odt'])
    def convert(self, stream, options, file_ext, log,
                accelerators):
        return Extract()(stream, '.')
--- a/src/calibre/ebooks/odt/to_oeb.py
+++ b/src/calibre/ebooks/odt/to_oeb.py
@ -1,72 +0,0 @@
 from __future__ import with_statement
 __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
 __docformat__ = 'restructuredtext en'
 '''
 Convert an ODT file into a Open Ebook
 '''
 import os, sys
 from odf.odf2xhtml import ODF2XHTML
 from calibre import CurrentDir, walk
 from calibre.utils.zipfile import ZipFile
 from calibre.utils.config import OptionParser
 from calibre.ebooks.metadata.odt import get_metadata
 from calibre.ebooks.metadata.opf2 import OPFCreator
 class Extract(ODF2XHTML):
    def extract_pictures(self, zf):
        if not os.path.exists('Pictures'):
            os.makedirs('Pictures')
        for name in zf.namelist():
            if name.startswith('Pictures'):
                data = zf.read(name)
                with open(name, 'wb') as f:
                    f.write(data)
    def __call__(self, path, odir):
        if not os.path.exists(odir):
            os.makedirs(odir)
        path = os.path.abspath(path)
        with CurrentDir(odir):
            print 'Extracting ODT file...'
            html = self.odf2xhtml(path)
            with open('index.html', 'wb') as f:
                f.write(html.encode('utf-8'))
            with open(path, 'rb') as f:
                zf = ZipFile(f, 'r')
                self.extract_pictures(zf)
                f.seek(0)
                mi = get_metadata(f)
                if not mi.title:
                    mi.title = os.path.splitext(os.path.basename(path))
                if not mi.authors:
                    mi.authors = [_('Unknown')]
            opf = OPFCreator(os.path.abspath(os.getcwdu()), mi)
            opf.create_manifest([(os.path.abspath(f), None) for f in walk(os.getcwd())])
            opf.create_spine([os.path.abspath('index.html')])
            with open('metadata.opf', 'wb') as f:
                opf.render(f)
            return os.path.abspath('metadata.opf')
 def option_parser():
    parser = OptionParser('%prog [options] file.odt')
    parser.add_option('-o', '--output-dir', default='.', 
                      help=_('The output directory. Defaults to the current directory.'))
    return parser
 def main(args=sys.argv):
    parser = option_parser()
    opts, args = parser.parse_args(args)
    if len(args) < 2:
        parser.print_help()
        print 'No ODT file specified'
        return 1
    Extract()(args[1], os.path.abspath(opts.output_dir))
    print 'Extracted to', os.path.abspath(opts.output_dir)
    return 0
 if __name__ == '__main__':
    sys.exit(main())