mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Initial implementation of epub2lrf and epub-meta (thanks llasram)
This commit is contained in:
parent
2d0ae97730
commit
1546c82807
4
setup.py
4
setup.py
@ -25,10 +25,12 @@ entry_points = {
|
|||||||
'rtf-meta = libprs500.ebooks.metadata.rtf:main',
|
'rtf-meta = libprs500.ebooks.metadata.rtf:main',
|
||||||
'pdf-meta = libprs500.ebooks.metadata.pdf:main',
|
'pdf-meta = libprs500.ebooks.metadata.pdf:main',
|
||||||
'lit-meta = libprs500.ebooks.metadata.lit:main',
|
'lit-meta = libprs500.ebooks.metadata.lit:main',
|
||||||
|
'epub-meta = libprs500.ebooks.metadata.epub:main',
|
||||||
'txt2lrf = libprs500.ebooks.lrf.txt.convert_from:main',
|
'txt2lrf = libprs500.ebooks.lrf.txt.convert_from:main',
|
||||||
'html2lrf = libprs500.ebooks.lrf.html.convert_from:main',
|
'html2lrf = libprs500.ebooks.lrf.html.convert_from:main',
|
||||||
'markdown-libprs500 = libprs500.ebooks.markdown.markdown:main',
|
'markdown-libprs500 = libprs500.ebooks.markdown.markdown:main',
|
||||||
'lit2lrf = libprs500.ebooks.lrf.lit.convert_from:main',
|
'lit2lrf = libprs500.ebooks.lrf.lit.convert_from:main',
|
||||||
|
'epub2lrf = libprs500.ebooks.lrf.epub.convert_from:main',
|
||||||
'rtf2lrf = libprs500.ebooks.lrf.rtf.convert_from:main',
|
'rtf2lrf = libprs500.ebooks.lrf.rtf.convert_from:main',
|
||||||
'web2disk = libprs500.web.fetch.simple:main',
|
'web2disk = libprs500.web.fetch.simple:main',
|
||||||
'web2lrf = libprs500.ebooks.lrf.web.convert_from:main',
|
'web2lrf = libprs500.ebooks.lrf.web.convert_from:main',
|
||||||
@ -131,4 +133,4 @@ if __name__ == '__main__':
|
|||||||
)
|
)
|
||||||
|
|
||||||
if 'develop' in ' '.join(sys.argv) and islinux:
|
if 'develop' in ' '.join(sys.argv) and islinux:
|
||||||
subprocess.check_call('libprs500_postinstall', shell=True)
|
subprocess.check_call('libprs500_postinstall', shell=True)
|
||||||
|
15
src/libprs500/ebooks/lrf/epub/__init__.py
Normal file
15
src/libprs500/ebooks/lrf/epub/__init__.py
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
## Copyright (C) 2007 Kovid Goyal kovid@kovidgoyal.net
|
||||||
|
## This program is free software; you can redistribute it and/or modify
|
||||||
|
## it under the terms of the GNU General Public License as published by
|
||||||
|
## the Free Software Foundation; either version 2 of the License, or
|
||||||
|
## (at your option) any later version.
|
||||||
|
##
|
||||||
|
## This program is distributed in the hope that it will be useful,
|
||||||
|
## but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
## GNU General Public License for more details.
|
||||||
|
##
|
||||||
|
## You should have received a copy of the GNU General Public License along
|
||||||
|
## with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
85
src/libprs500/ebooks/lrf/epub/convert_from.py
Normal file
85
src/libprs500/ebooks/lrf/epub/convert_from.py
Normal file
@ -0,0 +1,85 @@
|
|||||||
|
## Copyright (C) 2007 Kovid Goyal kovid@kovidgoyal.net
|
||||||
|
## This program is free software; you can redistribute it and/or modify
|
||||||
|
## it under the terms of the GNU General Public License as published by
|
||||||
|
## the Free Software Foundation; either version 2 of the License, or
|
||||||
|
## (at your option) any later version.
|
||||||
|
##
|
||||||
|
## This program is distributed in the hope that it will be useful,
|
||||||
|
## but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
## GNU General Public License for more details.
|
||||||
|
##
|
||||||
|
## You should have received a copy of the GNU General Public License along
|
||||||
|
## with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
import os, sys, shutil, glob, logging
|
||||||
|
from tempfile import mkdtemp
|
||||||
|
from subprocess import Popen, PIPE
|
||||||
|
from libprs500.ebooks.lrf import option_parser as lrf_option_parser
|
||||||
|
from libprs500.ebooks import ConversionError
|
||||||
|
from libprs500.ebooks.lrf.html.convert_from import process_file as html_process_file
|
||||||
|
from libprs500.ebooks.metadata.opf import OPFReader, OPF
|
||||||
|
from libprs500.ebooks.metadata.epub import OCFDirReader
|
||||||
|
from libprs500.libunzip import extract as zip_extract
|
||||||
|
from libprs500 import isosx, __appname__, setup_cli_handlers, iswindows
|
||||||
|
|
||||||
|
|
||||||
|
def option_parser():
|
||||||
|
return lrf_option_parser(
|
||||||
|
'''Usage: %prog [options] mybook.epub\n\n'''
|
||||||
|
'''%prog converts mybook.epub to mybook.lrf'''
|
||||||
|
)
|
||||||
|
|
||||||
|
def generate_html(pathtoepub, logger):
|
||||||
|
if not os.access(pathtoepub, os.R_OK):
|
||||||
|
raise ConversionError, 'Cannot read from ' + pathtoepub
|
||||||
|
tdir = mkdtemp(prefix=__appname__+'_')
|
||||||
|
os.rmdir(tdir)
|
||||||
|
try:
|
||||||
|
zip_extract(pathtoepub, tdir)
|
||||||
|
except:
|
||||||
|
if os.path.exists(tdir) and os.path.isdir(tdir):
|
||||||
|
shutil.rmtree(tdir)
|
||||||
|
raise ConversionError, '.epub extraction failed'
|
||||||
|
return tdir
|
||||||
|
|
||||||
|
def process_file(path, options, logger=None):
|
||||||
|
if logger is None:
|
||||||
|
level = logging.DEBUG if options.verbose else logging.INFO
|
||||||
|
logger = logging.getLogger('epub2lrf')
|
||||||
|
setup_cli_handlers(logger, level)
|
||||||
|
epub = os.path.abspath(os.path.expanduser(path))
|
||||||
|
tdir = generate_html(epub, logger)
|
||||||
|
try:
|
||||||
|
ocf = OCFDirReader(tdir)
|
||||||
|
htmlfile = ocf.opf.spine.items().next().href
|
||||||
|
options.opf = os.path.join(tdir, ocf.container[OPF.MIMETYPE])
|
||||||
|
if not options.output:
|
||||||
|
ext = '.lrs' if options.lrs else '.lrf'
|
||||||
|
options.output = os.path.abspath(os.path.basename(os.path.splitext(path)[0]) + ext)
|
||||||
|
options.output = os.path.abspath(os.path.expanduser(options.output))
|
||||||
|
options.use_spine = True
|
||||||
|
|
||||||
|
html_process_file(htmlfile, options, logger=logger)
|
||||||
|
finally:
|
||||||
|
try:
|
||||||
|
shutil.rmtree(tdir)
|
||||||
|
except:
|
||||||
|
logger.warning('Failed to delete temporary directory '+tdir)
|
||||||
|
|
||||||
|
|
||||||
|
def main(args=sys.argv, logger=None):
|
||||||
|
parser = option_parser()
|
||||||
|
options, args = parser.parse_args(args)
|
||||||
|
if len(args) != 2:
|
||||||
|
parser.print_help()
|
||||||
|
print
|
||||||
|
print 'No epub file specified'
|
||||||
|
return 1
|
||||||
|
process_file(args[1], options, logger)
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
sys.exit(main())
|
120
src/libprs500/ebooks/metadata/epub.py
Normal file
120
src/libprs500/ebooks/metadata/epub.py
Normal file
@ -0,0 +1,120 @@
|
|||||||
|
## Copyright (C) 2007 Kovid Goyal kovid@kovidgoyal.net
|
||||||
|
## This program is free software; you can redistribute it and/or modify
|
||||||
|
## it under the terms of the GNU General Public License as published by
|
||||||
|
## the Free Software Foundation; either version 2 of the License, or
|
||||||
|
## (at your option) any later version.
|
||||||
|
##
|
||||||
|
## This program is distributed in the hope that it will be useful,
|
||||||
|
## but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
## GNU General Public License for more details.
|
||||||
|
##
|
||||||
|
## You should have received a copy of the GNU General Public License along
|
||||||
|
## with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
'''Read meta information from PDF files'''
|
||||||
|
|
||||||
|
from __future__ import with_statement
|
||||||
|
|
||||||
|
import sys, os
|
||||||
|
|
||||||
|
from zipfile import ZipFile, BadZipfile
|
||||||
|
from cStringIO import StringIO
|
||||||
|
from contextlib import closing
|
||||||
|
|
||||||
|
from libprs500.ebooks.BeautifulSoup import BeautifulStoneSoup, BeautifulSoup
|
||||||
|
from libprs500.ebooks.metadata import MetaInformation
|
||||||
|
from libprs500.ebooks.metadata.opf import OPF, OPFReader
|
||||||
|
|
||||||
|
|
||||||
|
class EPubException(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
class OCFException(EPubException):
|
||||||
|
pass
|
||||||
|
|
||||||
|
class ContainerException(OCFException):
|
||||||
|
pass
|
||||||
|
|
||||||
|
class Container(dict):
|
||||||
|
def __init__(self, stream=None):
|
||||||
|
if not stream: return
|
||||||
|
soup = BeautifulStoneSoup(stream.read())
|
||||||
|
container = soup.find('container')
|
||||||
|
if not container:
|
||||||
|
raise OCFException("<container/> element missing")
|
||||||
|
if container.get('version', None) != '1.0':
|
||||||
|
raise EPubException("unsupported version of OCF")
|
||||||
|
rootfiles = container.find('rootfiles')
|
||||||
|
if not rootfiles:
|
||||||
|
raise EPubException("<rootfiles/> element missing")
|
||||||
|
for rootfile in rootfiles.findAll('rootfile'):
|
||||||
|
try:
|
||||||
|
self[rootfile['media-type']] = rootfile['full-path']
|
||||||
|
except KeyError:
|
||||||
|
raise EPubException("<rootfile/> element malformed")
|
||||||
|
|
||||||
|
class OCF(object):
|
||||||
|
MIMETYPE = 'application/epub+zip'
|
||||||
|
CONTAINER_PATH = 'META-INF/container.xml'
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
raise NotImplementedError('Abstract base class')
|
||||||
|
|
||||||
|
class OCFReader(OCF):
|
||||||
|
def __init__(self):
|
||||||
|
try:
|
||||||
|
mimetype = self.open('mimetype').read().rstrip()
|
||||||
|
if mimetype != OCF.MIMETYPE:
|
||||||
|
raise EPubException
|
||||||
|
except (KeyError, EPubException):
|
||||||
|
raise EPubException("not an .epub OCF container")
|
||||||
|
|
||||||
|
try:
|
||||||
|
with closing(self.open(OCF.CONTAINER_PATH)) as f:
|
||||||
|
self.container = Container(f)
|
||||||
|
except KeyError:
|
||||||
|
raise EPubException("missing OCF container.xml file")
|
||||||
|
|
||||||
|
try:
|
||||||
|
with closing(self.open(self.container[OPF.MIMETYPE])) as f:
|
||||||
|
self.opf = OPFReader(f, self.root)
|
||||||
|
except KeyError:
|
||||||
|
raise EPubException("missing OPF package file")
|
||||||
|
|
||||||
|
class OCFZipReader(OCFReader):
|
||||||
|
def __init__(self, stream):
|
||||||
|
try:
|
||||||
|
self.archive = ZipFile(stream, 'r')
|
||||||
|
except BadZipfile:
|
||||||
|
raise EPubException("not a ZIP .epub OCF container")
|
||||||
|
self.root = getattr(stream, 'name', os.getcwd())
|
||||||
|
super(OCFZipReader, self).__init__()
|
||||||
|
|
||||||
|
def open(self, name, mode='r'):
|
||||||
|
return StringIO(self.archive.read(name))
|
||||||
|
|
||||||
|
class OCFDirReader(OCFReader):
|
||||||
|
def __init__(self, path):
|
||||||
|
self.root = path
|
||||||
|
super(OCFDirReader, self).__init__()
|
||||||
|
|
||||||
|
def open(self, path, *args, **kwargs):
|
||||||
|
return open(os.path.join(self.root, path), *args, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
def get_metadata(stream):
|
||||||
|
""" Return metadata as a L{MetaInfo} object """
|
||||||
|
return OCFZipReader(stream).opf
|
||||||
|
|
||||||
|
def main(args=sys.argv):
|
||||||
|
if len(args) != 2 or '--help' in args or '-help' in args:
|
||||||
|
print >>sys.stderr, 'Usage: epub-meta FILE'
|
||||||
|
return 1
|
||||||
|
|
||||||
|
path = os.path.abspath(os.path.expanduser(args[1]))
|
||||||
|
print unicode(get_metadata(open(path, 'rb')))
|
||||||
|
return 0
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
sys.exit(main())
|
@ -120,6 +120,7 @@ class standard_field(object):
|
|||||||
|
|
||||||
class OPF(MetaInformation):
|
class OPF(MetaInformation):
|
||||||
|
|
||||||
|
MIMETYPE = 'application/oebps-package+xml'
|
||||||
ENTITY_PATTERN = re.compile(r'&(\S+?);')
|
ENTITY_PATTERN = re.compile(r'&(\S+?);')
|
||||||
|
|
||||||
libprs_id = standard_field('libprs_id')
|
libprs_id = standard_field('libprs_id')
|
||||||
|
@ -88,6 +88,7 @@ def setup_completion():
|
|||||||
['htm', 'html', 'xhtml', 'xhtm', 'rar', 'zip', 'php']))
|
['htm', 'html', 'xhtml', 'xhtm', 'rar', 'zip', 'php']))
|
||||||
f.write(opts_and_exts('txt2lrf', txtop, ['txt']))
|
f.write(opts_and_exts('txt2lrf', txtop, ['txt']))
|
||||||
f.write(opts_and_exts('lit2lrf', htmlop, ['lit']))
|
f.write(opts_and_exts('lit2lrf', htmlop, ['lit']))
|
||||||
|
f.write(opts_and_exts('epub2lrf', htmlop, ['epub']))
|
||||||
f.write(opts_and_exts('rtf2lrf', htmlop, ['rtf']))
|
f.write(opts_and_exts('rtf2lrf', htmlop, ['rtf']))
|
||||||
f.write(opts_and_exts('pdf2lrf', htmlop, ['pdf']))
|
f.write(opts_and_exts('pdf2lrf', htmlop, ['pdf']))
|
||||||
f.write(opts_and_exts('any2lrf', htmlop,
|
f.write(opts_and_exts('any2lrf', htmlop,
|
||||||
@ -97,6 +98,7 @@ def setup_completion():
|
|||||||
f.write(opts_and_exts('rtf-meta', metaop, ['rtf']))
|
f.write(opts_and_exts('rtf-meta', metaop, ['rtf']))
|
||||||
f.write(opts_and_exts('pdf-meta', metaop, ['pdf']))
|
f.write(opts_and_exts('pdf-meta', metaop, ['pdf']))
|
||||||
f.write(opts_and_exts('lit-meta', metaop, ['lit']))
|
f.write(opts_and_exts('lit-meta', metaop, ['lit']))
|
||||||
|
f.write(opts_and_exts('epub-meta', metaop, ['epub']))
|
||||||
f.write(opts_and_exts('lrfviewer', lrfviewerop, ['lrf']))
|
f.write(opts_and_exts('lrfviewer', lrfviewerop, ['lrf']))
|
||||||
f.write('''
|
f.write('''
|
||||||
_prs500_ls()
|
_prs500_ls()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user