mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Initial implementation of epub2lrf and epub-meta (thanks llasram)
This commit is contained in:
parent
2d0ae97730
commit
1546c82807
4
setup.py
4
setup.py
@ -25,10 +25,12 @@ entry_points = {
|
||||
'rtf-meta = libprs500.ebooks.metadata.rtf:main',
|
||||
'pdf-meta = libprs500.ebooks.metadata.pdf:main',
|
||||
'lit-meta = libprs500.ebooks.metadata.lit:main',
|
||||
'epub-meta = libprs500.ebooks.metadata.epub:main',
|
||||
'txt2lrf = libprs500.ebooks.lrf.txt.convert_from:main',
|
||||
'html2lrf = libprs500.ebooks.lrf.html.convert_from:main',
|
||||
'markdown-libprs500 = libprs500.ebooks.markdown.markdown:main',
|
||||
'lit2lrf = libprs500.ebooks.lrf.lit.convert_from:main',
|
||||
'epub2lrf = libprs500.ebooks.lrf.epub.convert_from:main',
|
||||
'rtf2lrf = libprs500.ebooks.lrf.rtf.convert_from:main',
|
||||
'web2disk = libprs500.web.fetch.simple:main',
|
||||
'web2lrf = libprs500.ebooks.lrf.web.convert_from:main',
|
||||
@ -131,4 +133,4 @@ if __name__ == '__main__':
|
||||
)
|
||||
|
||||
if 'develop' in ' '.join(sys.argv) and islinux:
|
||||
subprocess.check_call('libprs500_postinstall', shell=True)
|
||||
subprocess.check_call('libprs500_postinstall', shell=True)
|
||||
|
15
src/libprs500/ebooks/lrf/epub/__init__.py
Normal file
15
src/libprs500/ebooks/lrf/epub/__init__.py
Normal file
@ -0,0 +1,15 @@
|
||||
## Copyright (C) 2007 Kovid Goyal kovid@kovidgoyal.net
|
||||
## This program is free software; you can redistribute it and/or modify
|
||||
## it under the terms of the GNU General Public License as published by
|
||||
## the Free Software Foundation; either version 2 of the License, or
|
||||
## (at your option) any later version.
|
||||
##
|
||||
## This program is distributed in the hope that it will be useful,
|
||||
## but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
## GNU General Public License for more details.
|
||||
##
|
||||
## You should have received a copy of the GNU General Public License along
|
||||
## with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
85
src/libprs500/ebooks/lrf/epub/convert_from.py
Normal file
85
src/libprs500/ebooks/lrf/epub/convert_from.py
Normal file
@ -0,0 +1,85 @@
|
||||
## Copyright (C) 2007 Kovid Goyal kovid@kovidgoyal.net
|
||||
## This program is free software; you can redistribute it and/or modify
|
||||
## it under the terms of the GNU General Public License as published by
|
||||
## the Free Software Foundation; either version 2 of the License, or
|
||||
## (at your option) any later version.
|
||||
##
|
||||
## This program is distributed in the hope that it will be useful,
|
||||
## but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
## GNU General Public License for more details.
|
||||
##
|
||||
## You should have received a copy of the GNU General Public License along
|
||||
## with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
import os, sys, shutil, glob, logging
|
||||
from tempfile import mkdtemp
|
||||
from subprocess import Popen, PIPE
|
||||
from libprs500.ebooks.lrf import option_parser as lrf_option_parser
|
||||
from libprs500.ebooks import ConversionError
|
||||
from libprs500.ebooks.lrf.html.convert_from import process_file as html_process_file
|
||||
from libprs500.ebooks.metadata.opf import OPFReader, OPF
|
||||
from libprs500.ebooks.metadata.epub import OCFDirReader
|
||||
from libprs500.libunzip import extract as zip_extract
|
||||
from libprs500 import isosx, __appname__, setup_cli_handlers, iswindows
|
||||
|
||||
|
||||
def option_parser():
|
||||
return lrf_option_parser(
|
||||
'''Usage: %prog [options] mybook.epub\n\n'''
|
||||
'''%prog converts mybook.epub to mybook.lrf'''
|
||||
)
|
||||
|
||||
def generate_html(pathtoepub, logger):
|
||||
if not os.access(pathtoepub, os.R_OK):
|
||||
raise ConversionError, 'Cannot read from ' + pathtoepub
|
||||
tdir = mkdtemp(prefix=__appname__+'_')
|
||||
os.rmdir(tdir)
|
||||
try:
|
||||
zip_extract(pathtoepub, tdir)
|
||||
except:
|
||||
if os.path.exists(tdir) and os.path.isdir(tdir):
|
||||
shutil.rmtree(tdir)
|
||||
raise ConversionError, '.epub extraction failed'
|
||||
return tdir
|
||||
|
||||
def process_file(path, options, logger=None):
|
||||
if logger is None:
|
||||
level = logging.DEBUG if options.verbose else logging.INFO
|
||||
logger = logging.getLogger('epub2lrf')
|
||||
setup_cli_handlers(logger, level)
|
||||
epub = os.path.abspath(os.path.expanduser(path))
|
||||
tdir = generate_html(epub, logger)
|
||||
try:
|
||||
ocf = OCFDirReader(tdir)
|
||||
htmlfile = ocf.opf.spine.items().next().href
|
||||
options.opf = os.path.join(tdir, ocf.container[OPF.MIMETYPE])
|
||||
if not options.output:
|
||||
ext = '.lrs' if options.lrs else '.lrf'
|
||||
options.output = os.path.abspath(os.path.basename(os.path.splitext(path)[0]) + ext)
|
||||
options.output = os.path.abspath(os.path.expanduser(options.output))
|
||||
options.use_spine = True
|
||||
|
||||
html_process_file(htmlfile, options, logger=logger)
|
||||
finally:
|
||||
try:
|
||||
shutil.rmtree(tdir)
|
||||
except:
|
||||
logger.warning('Failed to delete temporary directory '+tdir)
|
||||
|
||||
|
||||
def main(args=sys.argv, logger=None):
|
||||
parser = option_parser()
|
||||
options, args = parser.parse_args(args)
|
||||
if len(args) != 2:
|
||||
parser.print_help()
|
||||
print
|
||||
print 'No epub file specified'
|
||||
return 1
|
||||
process_file(args[1], options, logger)
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
120
src/libprs500/ebooks/metadata/epub.py
Normal file
120
src/libprs500/ebooks/metadata/epub.py
Normal file
@ -0,0 +1,120 @@
|
||||
## Copyright (C) 2007 Kovid Goyal kovid@kovidgoyal.net
|
||||
## This program is free software; you can redistribute it and/or modify
|
||||
## it under the terms of the GNU General Public License as published by
|
||||
## the Free Software Foundation; either version 2 of the License, or
|
||||
## (at your option) any later version.
|
||||
##
|
||||
## This program is distributed in the hope that it will be useful,
|
||||
## but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
## GNU General Public License for more details.
|
||||
##
|
||||
## You should have received a copy of the GNU General Public License along
|
||||
## with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
'''Read meta information from PDF files'''
|
||||
|
||||
from __future__ import with_statement
|
||||
|
||||
import sys, os
|
||||
|
||||
from zipfile import ZipFile, BadZipfile
|
||||
from cStringIO import StringIO
|
||||
from contextlib import closing
|
||||
|
||||
from libprs500.ebooks.BeautifulSoup import BeautifulStoneSoup, BeautifulSoup
|
||||
from libprs500.ebooks.metadata import MetaInformation
|
||||
from libprs500.ebooks.metadata.opf import OPF, OPFReader
|
||||
|
||||
|
||||
class EPubException(Exception):
|
||||
pass
|
||||
|
||||
class OCFException(EPubException):
|
||||
pass
|
||||
|
||||
class ContainerException(OCFException):
|
||||
pass
|
||||
|
||||
class Container(dict):
|
||||
def __init__(self, stream=None):
|
||||
if not stream: return
|
||||
soup = BeautifulStoneSoup(stream.read())
|
||||
container = soup.find('container')
|
||||
if not container:
|
||||
raise OCFException("<container/> element missing")
|
||||
if container.get('version', None) != '1.0':
|
||||
raise EPubException("unsupported version of OCF")
|
||||
rootfiles = container.find('rootfiles')
|
||||
if not rootfiles:
|
||||
raise EPubException("<rootfiles/> element missing")
|
||||
for rootfile in rootfiles.findAll('rootfile'):
|
||||
try:
|
||||
self[rootfile['media-type']] = rootfile['full-path']
|
||||
except KeyError:
|
||||
raise EPubException("<rootfile/> element malformed")
|
||||
|
||||
class OCF(object):
|
||||
MIMETYPE = 'application/epub+zip'
|
||||
CONTAINER_PATH = 'META-INF/container.xml'
|
||||
|
||||
def __init__(self):
|
||||
raise NotImplementedError('Abstract base class')
|
||||
|
||||
class OCFReader(OCF):
|
||||
def __init__(self):
|
||||
try:
|
||||
mimetype = self.open('mimetype').read().rstrip()
|
||||
if mimetype != OCF.MIMETYPE:
|
||||
raise EPubException
|
||||
except (KeyError, EPubException):
|
||||
raise EPubException("not an .epub OCF container")
|
||||
|
||||
try:
|
||||
with closing(self.open(OCF.CONTAINER_PATH)) as f:
|
||||
self.container = Container(f)
|
||||
except KeyError:
|
||||
raise EPubException("missing OCF container.xml file")
|
||||
|
||||
try:
|
||||
with closing(self.open(self.container[OPF.MIMETYPE])) as f:
|
||||
self.opf = OPFReader(f, self.root)
|
||||
except KeyError:
|
||||
raise EPubException("missing OPF package file")
|
||||
|
||||
class OCFZipReader(OCFReader):
|
||||
def __init__(self, stream):
|
||||
try:
|
||||
self.archive = ZipFile(stream, 'r')
|
||||
except BadZipfile:
|
||||
raise EPubException("not a ZIP .epub OCF container")
|
||||
self.root = getattr(stream, 'name', os.getcwd())
|
||||
super(OCFZipReader, self).__init__()
|
||||
|
||||
def open(self, name, mode='r'):
|
||||
return StringIO(self.archive.read(name))
|
||||
|
||||
class OCFDirReader(OCFReader):
|
||||
def __init__(self, path):
|
||||
self.root = path
|
||||
super(OCFDirReader, self).__init__()
|
||||
|
||||
def open(self, path, *args, **kwargs):
|
||||
return open(os.path.join(self.root, path), *args, **kwargs)
|
||||
|
||||
|
||||
def get_metadata(stream):
|
||||
""" Return metadata as a L{MetaInfo} object """
|
||||
return OCFZipReader(stream).opf
|
||||
|
||||
def main(args=sys.argv):
|
||||
if len(args) != 2 or '--help' in args or '-help' in args:
|
||||
print >>sys.stderr, 'Usage: epub-meta FILE'
|
||||
return 1
|
||||
|
||||
path = os.path.abspath(os.path.expanduser(args[1]))
|
||||
print unicode(get_metadata(open(path, 'rb')))
|
||||
return 0
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
@ -120,6 +120,7 @@ class standard_field(object):
|
||||
|
||||
class OPF(MetaInformation):
|
||||
|
||||
MIMETYPE = 'application/oebps-package+xml'
|
||||
ENTITY_PATTERN = re.compile(r'&(\S+?);')
|
||||
|
||||
libprs_id = standard_field('libprs_id')
|
||||
|
@ -88,6 +88,7 @@ def setup_completion():
|
||||
['htm', 'html', 'xhtml', 'xhtm', 'rar', 'zip', 'php']))
|
||||
f.write(opts_and_exts('txt2lrf', txtop, ['txt']))
|
||||
f.write(opts_and_exts('lit2lrf', htmlop, ['lit']))
|
||||
f.write(opts_and_exts('epub2lrf', htmlop, ['epub']))
|
||||
f.write(opts_and_exts('rtf2lrf', htmlop, ['rtf']))
|
||||
f.write(opts_and_exts('pdf2lrf', htmlop, ['pdf']))
|
||||
f.write(opts_and_exts('any2lrf', htmlop,
|
||||
@ -97,6 +98,7 @@ def setup_completion():
|
||||
f.write(opts_and_exts('rtf-meta', metaop, ['rtf']))
|
||||
f.write(opts_and_exts('pdf-meta', metaop, ['pdf']))
|
||||
f.write(opts_and_exts('lit-meta', metaop, ['lit']))
|
||||
f.write(opts_and_exts('epub-meta', metaop, ['epub']))
|
||||
f.write(opts_and_exts('lrfviewer', lrfviewerop, ['lrf']))
|
||||
f.write('''
|
||||
_prs500_ls()
|
||||
|
Loading…
x
Reference in New Issue
Block a user