Initial implementation of epub2lrf and epub-meta (thanks llasram)

This commit is contained in:
Kovid Goyal 2008-01-07 18:04:44 +00:00
parent 2d0ae97730
commit 1546c82807
6 changed files with 226 additions and 1 deletions

View File

@ -25,10 +25,12 @@ entry_points = {
'rtf-meta = libprs500.ebooks.metadata.rtf:main',
'pdf-meta = libprs500.ebooks.metadata.pdf:main',
'lit-meta = libprs500.ebooks.metadata.lit:main',
'epub-meta = libprs500.ebooks.metadata.epub:main',
'txt2lrf = libprs500.ebooks.lrf.txt.convert_from:main',
'html2lrf = libprs500.ebooks.lrf.html.convert_from:main',
'markdown-libprs500 = libprs500.ebooks.markdown.markdown:main',
'lit2lrf = libprs500.ebooks.lrf.lit.convert_from:main',
'epub2lrf = libprs500.ebooks.lrf.epub.convert_from:main',
'rtf2lrf = libprs500.ebooks.lrf.rtf.convert_from:main',
'web2disk = libprs500.web.fetch.simple:main',
'web2lrf = libprs500.ebooks.lrf.web.convert_from:main',
@ -131,4 +133,4 @@ if __name__ == '__main__':
)
if 'develop' in ' '.join(sys.argv) and islinux:
subprocess.check_call('libprs500_postinstall', shell=True)
subprocess.check_call('libprs500_postinstall', shell=True)

View File

@ -0,0 +1,15 @@
## Copyright (C) 2007 Kovid Goyal kovid@kovidgoyal.net
## This program is free software; you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation; either version 2 of the License, or
## (at your option) any later version.
##
## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
## GNU General Public License for more details.
##
## You should have received a copy of the GNU General Public License along
## with this program; if not, write to the Free Software Foundation, Inc.,
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.

View File

@ -0,0 +1,85 @@
## Copyright (C) 2007 Kovid Goyal kovid@kovidgoyal.net
## This program is free software; you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation; either version 2 of the License, or
## (at your option) any later version.
##
## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
## GNU General Public License for more details.
##
## You should have received a copy of the GNU General Public License along
## with this program; if not, write to the Free Software Foundation, Inc.,
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
import os, sys, shutil, glob, logging
from tempfile import mkdtemp
from subprocess import Popen, PIPE
from libprs500.ebooks.lrf import option_parser as lrf_option_parser
from libprs500.ebooks import ConversionError
from libprs500.ebooks.lrf.html.convert_from import process_file as html_process_file
from libprs500.ebooks.metadata.opf import OPFReader, OPF
from libprs500.ebooks.metadata.epub import OCFDirReader
from libprs500.libunzip import extract as zip_extract
from libprs500 import isosx, __appname__, setup_cli_handlers, iswindows
def option_parser():
return lrf_option_parser(
'''Usage: %prog [options] mybook.epub\n\n'''
'''%prog converts mybook.epub to mybook.lrf'''
)
def generate_html(pathtoepub, logger):
if not os.access(pathtoepub, os.R_OK):
raise ConversionError, 'Cannot read from ' + pathtoepub
tdir = mkdtemp(prefix=__appname__+'_')
os.rmdir(tdir)
try:
zip_extract(pathtoepub, tdir)
except:
if os.path.exists(tdir) and os.path.isdir(tdir):
shutil.rmtree(tdir)
raise ConversionError, '.epub extraction failed'
return tdir
def process_file(path, options, logger=None):
if logger is None:
level = logging.DEBUG if options.verbose else logging.INFO
logger = logging.getLogger('epub2lrf')
setup_cli_handlers(logger, level)
epub = os.path.abspath(os.path.expanduser(path))
tdir = generate_html(epub, logger)
try:
ocf = OCFDirReader(tdir)
htmlfile = ocf.opf.spine.items().next().href
options.opf = os.path.join(tdir, ocf.container[OPF.MIMETYPE])
if not options.output:
ext = '.lrs' if options.lrs else '.lrf'
options.output = os.path.abspath(os.path.basename(os.path.splitext(path)[0]) + ext)
options.output = os.path.abspath(os.path.expanduser(options.output))
options.use_spine = True
html_process_file(htmlfile, options, logger=logger)
finally:
try:
shutil.rmtree(tdir)
except:
logger.warning('Failed to delete temporary directory '+tdir)
def main(args=sys.argv, logger=None):
parser = option_parser()
options, args = parser.parse_args(args)
if len(args) != 2:
parser.print_help()
print
print 'No epub file specified'
return 1
process_file(args[1], options, logger)
return 0
if __name__ == '__main__':
sys.exit(main())

View File

@ -0,0 +1,120 @@
## Copyright (C) 2007 Kovid Goyal kovid@kovidgoyal.net
## This program is free software; you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation; either version 2 of the License, or
## (at your option) any later version.
##
## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
## GNU General Public License for more details.
##
## You should have received a copy of the GNU General Public License along
## with this program; if not, write to the Free Software Foundation, Inc.,
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
'''Read meta information from PDF files'''
from __future__ import with_statement
import sys, os
from zipfile import ZipFile, BadZipfile
from cStringIO import StringIO
from contextlib import closing
from libprs500.ebooks.BeautifulSoup import BeautifulStoneSoup, BeautifulSoup
from libprs500.ebooks.metadata import MetaInformation
from libprs500.ebooks.metadata.opf import OPF, OPFReader
class EPubException(Exception):
pass
class OCFException(EPubException):
pass
class ContainerException(OCFException):
pass
class Container(dict):
def __init__(self, stream=None):
if not stream: return
soup = BeautifulStoneSoup(stream.read())
container = soup.find('container')
if not container:
raise OCFException("<container/> element missing")
if container.get('version', None) != '1.0':
raise EPubException("unsupported version of OCF")
rootfiles = container.find('rootfiles')
if not rootfiles:
raise EPubException("<rootfiles/> element missing")
for rootfile in rootfiles.findAll('rootfile'):
try:
self[rootfile['media-type']] = rootfile['full-path']
except KeyError:
raise EPubException("<rootfile/> element malformed")
class OCF(object):
MIMETYPE = 'application/epub+zip'
CONTAINER_PATH = 'META-INF/container.xml'
def __init__(self):
raise NotImplementedError('Abstract base class')
class OCFReader(OCF):
def __init__(self):
try:
mimetype = self.open('mimetype').read().rstrip()
if mimetype != OCF.MIMETYPE:
raise EPubException
except (KeyError, EPubException):
raise EPubException("not an .epub OCF container")
try:
with closing(self.open(OCF.CONTAINER_PATH)) as f:
self.container = Container(f)
except KeyError:
raise EPubException("missing OCF container.xml file")
try:
with closing(self.open(self.container[OPF.MIMETYPE])) as f:
self.opf = OPFReader(f, self.root)
except KeyError:
raise EPubException("missing OPF package file")
class OCFZipReader(OCFReader):
def __init__(self, stream):
try:
self.archive = ZipFile(stream, 'r')
except BadZipfile:
raise EPubException("not a ZIP .epub OCF container")
self.root = getattr(stream, 'name', os.getcwd())
super(OCFZipReader, self).__init__()
def open(self, name, mode='r'):
return StringIO(self.archive.read(name))
class OCFDirReader(OCFReader):
def __init__(self, path):
self.root = path
super(OCFDirReader, self).__init__()
def open(self, path, *args, **kwargs):
return open(os.path.join(self.root, path), *args, **kwargs)
def get_metadata(stream):
""" Return metadata as a L{MetaInfo} object """
return OCFZipReader(stream).opf
def main(args=sys.argv):
if len(args) != 2 or '--help' in args or '-help' in args:
print >>sys.stderr, 'Usage: epub-meta FILE'
return 1
path = os.path.abspath(os.path.expanduser(args[1]))
print unicode(get_metadata(open(path, 'rb')))
return 0
if __name__ == '__main__':
sys.exit(main())

View File

@ -120,6 +120,7 @@ class standard_field(object):
class OPF(MetaInformation):
MIMETYPE = 'application/oebps-package+xml'
ENTITY_PATTERN = re.compile(r'&(\S+?);')
libprs_id = standard_field('libprs_id')

View File

@ -88,6 +88,7 @@ def setup_completion():
['htm', 'html', 'xhtml', 'xhtm', 'rar', 'zip', 'php']))
f.write(opts_and_exts('txt2lrf', txtop, ['txt']))
f.write(opts_and_exts('lit2lrf', htmlop, ['lit']))
f.write(opts_and_exts('epub2lrf', htmlop, ['epub']))
f.write(opts_and_exts('rtf2lrf', htmlop, ['rtf']))
f.write(opts_and_exts('pdf2lrf', htmlop, ['pdf']))
f.write(opts_and_exts('any2lrf', htmlop,
@ -97,6 +98,7 @@ def setup_completion():
f.write(opts_and_exts('rtf-meta', metaop, ['rtf']))
f.write(opts_and_exts('pdf-meta', metaop, ['pdf']))
f.write(opts_and_exts('lit-meta', metaop, ['lit']))
f.write(opts_and_exts('epub-meta', metaop, ['epub']))
f.write(opts_and_exts('lrfviewer', lrfviewerop, ['lrf']))
f.write('''
_prs500_ls()