diff --git a/setup.py b/setup.py
index f95b43de32..008f1c301f 100644
--- a/setup.py
+++ b/setup.py
@@ -25,10 +25,12 @@ entry_points = {
'rtf-meta = libprs500.ebooks.metadata.rtf:main',
'pdf-meta = libprs500.ebooks.metadata.pdf:main',
'lit-meta = libprs500.ebooks.metadata.lit:main',
+ 'epub-meta = libprs500.ebooks.metadata.epub:main',
'txt2lrf = libprs500.ebooks.lrf.txt.convert_from:main',
'html2lrf = libprs500.ebooks.lrf.html.convert_from:main',
'markdown-libprs500 = libprs500.ebooks.markdown.markdown:main',
'lit2lrf = libprs500.ebooks.lrf.lit.convert_from:main',
+ 'epub2lrf = libprs500.ebooks.lrf.epub.convert_from:main',
'rtf2lrf = libprs500.ebooks.lrf.rtf.convert_from:main',
'web2disk = libprs500.web.fetch.simple:main',
'web2lrf = libprs500.ebooks.lrf.web.convert_from:main',
@@ -131,4 +133,4 @@ if __name__ == '__main__':
)
if 'develop' in ' '.join(sys.argv) and islinux:
- subprocess.check_call('libprs500_postinstall', shell=True)
\ No newline at end of file
+ subprocess.check_call('libprs500_postinstall', shell=True)
diff --git a/src/libprs500/ebooks/lrf/epub/__init__.py b/src/libprs500/ebooks/lrf/epub/__init__.py
new file mode 100644
index 0000000000..97ad144cc4
--- /dev/null
+++ b/src/libprs500/ebooks/lrf/epub/__init__.py
@@ -0,0 +1,15 @@
+## Copyright (C) 2007 Kovid Goyal kovid@kovidgoyal.net
+## This program is free software; you can redistribute it and/or modify
+## it under the terms of the GNU General Public License as published by
+## the Free Software Foundation; either version 2 of the License, or
+## (at your option) any later version.
+##
+## This program is distributed in the hope that it will be useful,
+## but WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+## GNU General Public License for more details.
+##
+## You should have received a copy of the GNU General Public License along
+## with this program; if not, write to the Free Software Foundation, Inc.,
+## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
diff --git a/src/libprs500/ebooks/lrf/epub/convert_from.py b/src/libprs500/ebooks/lrf/epub/convert_from.py
new file mode 100644
index 0000000000..708e0d2a40
--- /dev/null
+++ b/src/libprs500/ebooks/lrf/epub/convert_from.py
@@ -0,0 +1,85 @@
+## Copyright (C) 2007 Kovid Goyal kovid@kovidgoyal.net
+## This program is free software; you can redistribute it and/or modify
+## it under the terms of the GNU General Public License as published by
+## the Free Software Foundation; either version 2 of the License, or
+## (at your option) any later version.
+##
+## This program is distributed in the hope that it will be useful,
+## but WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+## GNU General Public License for more details.
+##
+## You should have received a copy of the GNU General Public License along
+## with this program; if not, write to the Free Software Foundation, Inc.,
+## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+import os, sys, shutil, glob, logging
+from tempfile import mkdtemp
+from subprocess import Popen, PIPE
+from libprs500.ebooks.lrf import option_parser as lrf_option_parser
+from libprs500.ebooks import ConversionError
+from libprs500.ebooks.lrf.html.convert_from import process_file as html_process_file
+from libprs500.ebooks.metadata.opf import OPFReader, OPF
+from libprs500.ebooks.metadata.epub import OCFDirReader
+from libprs500.libunzip import extract as zip_extract
+from libprs500 import isosx, __appname__, setup_cli_handlers, iswindows
+
+
+def option_parser():
+ return lrf_option_parser(
+ '''Usage: %prog [options] mybook.epub\n\n'''
+ '''%prog converts mybook.epub to mybook.lrf'''
+ )
+
+def generate_html(pathtoepub, logger):
+ if not os.access(pathtoepub, os.R_OK):
+ raise ConversionError, 'Cannot read from ' + pathtoepub
+ tdir = mkdtemp(prefix=__appname__+'_')
+ os.rmdir(tdir)
+ try:
+ zip_extract(pathtoepub, tdir)
+ except:
+ if os.path.exists(tdir) and os.path.isdir(tdir):
+ shutil.rmtree(tdir)
+ raise ConversionError, '.epub extraction failed'
+ return tdir
+
+def process_file(path, options, logger=None):
+ if logger is None:
+ level = logging.DEBUG if options.verbose else logging.INFO
+ logger = logging.getLogger('epub2lrf')
+ setup_cli_handlers(logger, level)
+ epub = os.path.abspath(os.path.expanduser(path))
+ tdir = generate_html(epub, logger)
+ try:
+ ocf = OCFDirReader(tdir)
+ htmlfile = ocf.opf.spine.items().next().href
+ options.opf = os.path.join(tdir, ocf.container[OPF.MIMETYPE])
+ if not options.output:
+ ext = '.lrs' if options.lrs else '.lrf'
+ options.output = os.path.abspath(os.path.basename(os.path.splitext(path)[0]) + ext)
+ options.output = os.path.abspath(os.path.expanduser(options.output))
+ options.use_spine = True
+
+ html_process_file(htmlfile, options, logger=logger)
+ finally:
+ try:
+ shutil.rmtree(tdir)
+ except:
+ logger.warning('Failed to delete temporary directory '+tdir)
+
+
+def main(args=sys.argv, logger=None):
+ parser = option_parser()
+ options, args = parser.parse_args(args)
+ if len(args) != 2:
+ parser.print_help()
+ print
+ print 'No epub file specified'
+ return 1
+ process_file(args[1], options, logger)
+ return 0
+
+
+if __name__ == '__main__':
+ sys.exit(main())
diff --git a/src/libprs500/ebooks/metadata/epub.py b/src/libprs500/ebooks/metadata/epub.py
new file mode 100644
index 0000000000..4727ae5ca1
--- /dev/null
+++ b/src/libprs500/ebooks/metadata/epub.py
@@ -0,0 +1,120 @@
+## Copyright (C) 2007 Kovid Goyal kovid@kovidgoyal.net
+## This program is free software; you can redistribute it and/or modify
+## it under the terms of the GNU General Public License as published by
+## the Free Software Foundation; either version 2 of the License, or
+## (at your option) any later version.
+##
+## This program is distributed in the hope that it will be useful,
+## but WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+## GNU General Public License for more details.
+##
+## You should have received a copy of the GNU General Public License along
+## with this program; if not, write to the Free Software Foundation, Inc.,
+## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+'''Read meta information from PDF files'''
+
+from __future__ import with_statement
+
+import sys, os
+
+from zipfile import ZipFile, BadZipfile
+from cStringIO import StringIO
+from contextlib import closing
+
+from libprs500.ebooks.BeautifulSoup import BeautifulStoneSoup, BeautifulSoup
+from libprs500.ebooks.metadata import MetaInformation
+from libprs500.ebooks.metadata.opf import OPF, OPFReader
+
+
+class EPubException(Exception):
+ pass
+
+class OCFException(EPubException):
+ pass
+
+class ContainerException(OCFException):
+ pass
+
+class Container(dict):
+ def __init__(self, stream=None):
+ if not stream: return
+ soup = BeautifulStoneSoup(stream.read())
+ container = soup.find('container')
+ if not container:
+ raise OCFException(" element missing")
+ if container.get('version', None) != '1.0':
+ raise EPubException("unsupported version of OCF")
+ rootfiles = container.find('rootfiles')
+ if not rootfiles:
+ raise EPubException(" element missing")
+ for rootfile in rootfiles.findAll('rootfile'):
+ try:
+ self[rootfile['media-type']] = rootfile['full-path']
+ except KeyError:
+ raise EPubException(" element malformed")
+
+class OCF(object):
+ MIMETYPE = 'application/epub+zip'
+ CONTAINER_PATH = 'META-INF/container.xml'
+
+ def __init__(self):
+ raise NotImplementedError('Abstract base class')
+
+class OCFReader(OCF):
+ def __init__(self):
+ try:
+ mimetype = self.open('mimetype').read().rstrip()
+ if mimetype != OCF.MIMETYPE:
+ raise EPubException
+ except (KeyError, EPubException):
+ raise EPubException("not an .epub OCF container")
+
+ try:
+ with closing(self.open(OCF.CONTAINER_PATH)) as f:
+ self.container = Container(f)
+ except KeyError:
+ raise EPubException("missing OCF container.xml file")
+
+ try:
+ with closing(self.open(self.container[OPF.MIMETYPE])) as f:
+ self.opf = OPFReader(f, self.root)
+ except KeyError:
+ raise EPubException("missing OPF package file")
+
+class OCFZipReader(OCFReader):
+ def __init__(self, stream):
+ try:
+ self.archive = ZipFile(stream, 'r')
+ except BadZipfile:
+ raise EPubException("not a ZIP .epub OCF container")
+ self.root = getattr(stream, 'name', os.getcwd())
+ super(OCFZipReader, self).__init__()
+
+ def open(self, name, mode='r'):
+ return StringIO(self.archive.read(name))
+
+class OCFDirReader(OCFReader):
+ def __init__(self, path):
+ self.root = path
+ super(OCFDirReader, self).__init__()
+
+ def open(self, path, *args, **kwargs):
+ return open(os.path.join(self.root, path), *args, **kwargs)
+
+
+def get_metadata(stream):
+ """ Return metadata as a L{MetaInfo} object """
+ return OCFZipReader(stream).opf
+
+def main(args=sys.argv):
+ if len(args) != 2 or '--help' in args or '-help' in args:
+ print >>sys.stderr, 'Usage: epub-meta FILE'
+ return 1
+
+ path = os.path.abspath(os.path.expanduser(args[1]))
+ print unicode(get_metadata(open(path, 'rb')))
+ return 0
+
+if __name__ == '__main__':
+ sys.exit(main())
diff --git a/src/libprs500/ebooks/metadata/opf.py b/src/libprs500/ebooks/metadata/opf.py
index f87c6cdda0..7f0eee751a 100644
--- a/src/libprs500/ebooks/metadata/opf.py
+++ b/src/libprs500/ebooks/metadata/opf.py
@@ -120,6 +120,7 @@ class standard_field(object):
class OPF(MetaInformation):
+ MIMETYPE = 'application/oebps-package+xml'
ENTITY_PATTERN = re.compile(r'&(\S+?);')
libprs_id = standard_field('libprs_id')
diff --git a/src/libprs500/linux.py b/src/libprs500/linux.py
index 95336bd07c..af8790a9e8 100644
--- a/src/libprs500/linux.py
+++ b/src/libprs500/linux.py
@@ -88,6 +88,7 @@ def setup_completion():
['htm', 'html', 'xhtml', 'xhtm', 'rar', 'zip', 'php']))
f.write(opts_and_exts('txt2lrf', txtop, ['txt']))
f.write(opts_and_exts('lit2lrf', htmlop, ['lit']))
+ f.write(opts_and_exts('epub2lrf', htmlop, ['epub']))
f.write(opts_and_exts('rtf2lrf', htmlop, ['rtf']))
f.write(opts_and_exts('pdf2lrf', htmlop, ['pdf']))
f.write(opts_and_exts('any2lrf', htmlop,
@@ -97,6 +98,7 @@ def setup_completion():
f.write(opts_and_exts('rtf-meta', metaop, ['rtf']))
f.write(opts_and_exts('pdf-meta', metaop, ['pdf']))
f.write(opts_and_exts('lit-meta', metaop, ['lit']))
+ f.write(opts_and_exts('epub-meta', metaop, ['epub']))
f.write(opts_and_exts('lrfviewer', lrfviewerop, ['lrf']))
f.write('''
_prs500_ls()