mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Implement #976 (recursive import of HTML into library as OEBPS zip)
This commit is contained in:
parent
41a938aef0
commit
22859c604e
@ -1,4 +1,5 @@
|
||||
from __future__ import with_statement
|
||||
import cStringIO
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
@ -17,7 +18,7 @@ from calibre.utils.config import Config, StringConfig
|
||||
from calibre.ebooks.metadata.opf import OPFReader, OPFCreator
|
||||
from calibre.ebooks.metadata import MetaInformation
|
||||
from calibre.ebooks.metadata.meta import get_metadata
|
||||
from calibre.ptempfile import PersistentTemporaryDirectory
|
||||
from calibre.ptempfile import PersistentTemporaryDirectory, PersistentTemporaryFile
|
||||
from calibre.utils.zipfile import ZipFile
|
||||
|
||||
|
||||
@ -319,7 +320,7 @@ class Parser(PreProcessor, LoggingInterface):
|
||||
with open(os.path.join(self.tdir, self.htmlfile_map[self.htmlfile.path]), 'wb') as f:
|
||||
f.write(html.tostring(self.root,
|
||||
encoding='utf-8', method='xml',
|
||||
include_meta_content_type=True,
|
||||
include_meta_content_type=True,
|
||||
pretty_print=self.opts.pretty_print)
|
||||
)
|
||||
return f.name
|
||||
@ -491,9 +492,6 @@ Follow all links in an HTML file and collect them into the specified directory.
|
||||
Also collects any references resources like images, stylesheets, scripts, etc.
|
||||
'''))
|
||||
|
||||
def safe_option_parser():
|
||||
return option_parser(safe=True)
|
||||
|
||||
def search_for_opf(dir):
|
||||
for f in os.listdir(dir):
|
||||
if f.lower().endswith('.opf'):
|
||||
@ -501,9 +499,16 @@ def search_for_opf(dir):
|
||||
|
||||
|
||||
def get_filelist(htmlfile, opts):
|
||||
'''
|
||||
Build list of files references by html file or try to detect and use an
|
||||
OPF file instead.
|
||||
'''
|
||||
print 'Building file list...'
|
||||
|
||||
opf = search_for_opf(os.path.dirname(htmlfile))
|
||||
dir = os.path.dirname(htmlfile)
|
||||
if not dir:
|
||||
dir = os.getcwd()
|
||||
opf = search_for_opf(dir)
|
||||
filelist = None
|
||||
if opf is not None:
|
||||
filelist = opf_traverse(opf, verbose=opts.verbose, encoding=opts.encoding)
|
||||
if not filelist:
|
||||
@ -516,6 +521,9 @@ def get_filelist(htmlfile, opts):
|
||||
return opf, filelist
|
||||
|
||||
def parse_content(filelist, opts):
|
||||
'''
|
||||
Parse content, rewriting links and copying resources.
|
||||
'''
|
||||
if not opts.output:
|
||||
opts.output = '.'
|
||||
opts.output = os.path.abspath(opts.output)
|
||||
@ -530,6 +538,9 @@ def parse_content(filelist, opts):
|
||||
return resource_map, p.htmlfile_map
|
||||
|
||||
def merge_metadata(htmlfile, opf, opts):
|
||||
'''
|
||||
Merge metadata from various sources.
|
||||
'''
|
||||
if opf:
|
||||
mi = MetaInformation(opf)
|
||||
else:
|
||||
@ -548,29 +559,59 @@ def merge_metadata(htmlfile, opf, opts):
|
||||
return mi
|
||||
|
||||
def create_metadata(basepath, mi, filelist, resources):
|
||||
'''
|
||||
Create an OPF metadata object with correct spine and manifest.
|
||||
'''
|
||||
mi = OPFCreator(basepath, mi)
|
||||
entries = [('content/'+f, None) for f in filelist] + [(f, None) for f in resources]
|
||||
mi.create_manifest(entries)
|
||||
mi.create_spine(['content/'+f for f in filelist])
|
||||
return mi
|
||||
|
||||
def rebase_toc(toc, htmlfile_map, basepath, root=True):
|
||||
'''
|
||||
Rebase a :class:`calibre.ebooks.metadata.toc.TOC` object.
|
||||
'''
|
||||
def fix_entry(entry):
|
||||
if entry.abspath in htmlfile_map.keys():
|
||||
entry.href = 'content/' + htmlfile_map[entry.abspath]
|
||||
|
||||
for entry in toc:
|
||||
rebase_toc(entry, htmlfile_map, basepath, root=False)
|
||||
fix_entry(entry)
|
||||
if root:
|
||||
toc.base_path = basepath
|
||||
|
||||
def create_dir(htmlfile, opts):
|
||||
'''
|
||||
Create a directory that contains the open ebook
|
||||
'''
|
||||
opf, filelist = get_filelist(htmlfile, opts)
|
||||
mi = merge_metadata(htmlfile, opf, opts)
|
||||
resource_map, htmlfile_map = parse_content(filelist, opts)
|
||||
resources = [os.path.join(opts.output, 'content', f) for f in resource_map.values()]
|
||||
if opf.cover and os.access(opf.cover, os.R_OK):
|
||||
if opf and opf.cover and os.access(opf.cover, os.R_OK):
|
||||
cpath = os.path.join(opts.output, 'content', 'resources', '_cover_'+os.path.splitext(opf.cover)[-1])
|
||||
shutil.copyfile(opf.cover, cpath)
|
||||
resources.append(cpath)
|
||||
mi.cover = cpath
|
||||
spine = [htmlfile_map[f.path] for f in filelist]
|
||||
mi = create_metadata(opts.output, mi, spine, resources)
|
||||
buf = cStringIO.StringIO()
|
||||
if mi.toc:
|
||||
rebase_toc(mi.toc, htmlfile_map, opts.output)
|
||||
with open(os.path.join(opts.output, 'metadata.opf'), 'wb') as f:
|
||||
mi.render(f)
|
||||
mi.render(f, buf)
|
||||
toc = buf.getvalue()
|
||||
if toc:
|
||||
with open(os.path.join(opts.output, 'toc.ncx'), 'wb') as f:
|
||||
f.write(toc)
|
||||
print 'Open ebook created in', opts.output
|
||||
|
||||
def create_oebzip(htmlfile, opts):
|
||||
'''
|
||||
Create a zip file that contains the Open ebook.
|
||||
'''
|
||||
tdir = PersistentTemporaryDirectory('_create_oebzip')
|
||||
if opts.output is None:
|
||||
opts.output = os.path.join(os.path.splitext(htmlfile)[0]+'.oeb.zip')
|
||||
@ -597,5 +638,26 @@ def main(args=sys.argv):
|
||||
|
||||
return 0
|
||||
|
||||
def gui_main(htmlfile):
|
||||
'''
|
||||
Convenience wrapper for use in recursively importing HTML files.
|
||||
'''
|
||||
pt = PersistentTemporaryFile('_html2oeb_gui.oeb.zip')
|
||||
pt.close()
|
||||
opts = '''
|
||||
pretty_print = True
|
||||
max_levels = 5
|
||||
output = %s
|
||||
'''%repr(pt.name)
|
||||
c = config(defaults=opts)
|
||||
opts = c.parse()
|
||||
create_oebzip(htmlfile, opts)
|
||||
zf = ZipFile(pt.name, 'r')
|
||||
nontrivial = [f for f in zf.infolist() if f.compress_size > 1 and not f.filename.endswith('.opf')]
|
||||
if len(nontrivial) < 2:
|
||||
return None
|
||||
return pt.name
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
||||
|
@ -56,6 +56,7 @@ class TOC(list):
|
||||
if not os.path.isabs(path):
|
||||
path = os.path.join(self.base_path, path)
|
||||
return path
|
||||
|
||||
return property(fget=fget, doc=doc)
|
||||
|
||||
def read_from_opf(self, opfreader):
|
||||
|
@ -1,6 +1,6 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
import os, sys, textwrap, collections, traceback, time
|
||||
import os, sys, textwrap, collections, traceback, time, re
|
||||
from xml.parsers.expat import ExpatError
|
||||
from functools import partial
|
||||
from PyQt4.QtCore import Qt, SIGNAL, QObject, QCoreApplication, QUrl
|
||||
@ -43,6 +43,7 @@ from calibre.gui2.dialogs.choose_format import ChooseFormatDialog
|
||||
from calibre.gui2.dialogs.book_info import BookInfo
|
||||
from calibre.ebooks.metadata.meta import set_metadata
|
||||
from calibre.ebooks.metadata import MetaInformation
|
||||
from calibre.ebooks.html import gui_main as html2oeb
|
||||
from calibre.ebooks import BOOK_EXTENSIONS
|
||||
from calibre.ebooks.lrf import preferred_source_formats as LRF_PREFERRED_SOURCE_FORMATS
|
||||
from calibre.library.database2 import LibraryDatabase2, CoverCache
|
||||
@ -480,6 +481,15 @@ class Main(MainWindow, Ui_MainWindow):
|
||||
|
||||
if not to_device:
|
||||
model = self.current_view().model()
|
||||
html_pat = re.compile(r'\.x{0,1}htm(l{0,1})\s*$', re.IGNORECASE)
|
||||
paths = list(paths)
|
||||
for i, path in enumerate(paths):
|
||||
if html_pat.search(path) is not None:
|
||||
paths[i] = html2oeb(path)
|
||||
if paths[i] is None:
|
||||
paths[i] = path
|
||||
else:
|
||||
formats[i] = 'zip'
|
||||
duplicates = model.add_books(paths, formats, metadata)
|
||||
if duplicates:
|
||||
files = _('<p>Books with the same title as the following already exist in the database. Add them anyway?<ul>')
|
||||
|
Loading…
x
Reference in New Issue
Block a user