mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
IGN:Fix more minor regressions
This commit is contained in:
parent
f7bf112ae2
commit
829a344fe9
@ -6,7 +6,7 @@ __docformat__ = 'restructuredtext en'
|
||||
'''
|
||||
Conversion to EPUB.
|
||||
'''
|
||||
import sys
|
||||
import sys, textwrap
|
||||
from calibre.utils.config import Config, StringConfig
|
||||
from calibre.utils.zipfile import ZipFile, ZIP_DEFLATED
|
||||
from calibre.ebooks.html import config as common_config
|
||||
@ -53,9 +53,21 @@ The expression used must evaluate to a list of elements. To disable chapter dete
|
||||
use the expression "/". See the XPath Tutorial in the calibre User Manual for further
|
||||
help on using this feature.
|
||||
''').replace('\n', ' '))
|
||||
structure('no_chapters_in_toc', ['--no-chapters-in-toc'], default=False,
|
||||
help=_('Don\'t add detected chapters to the Table of Contents'))
|
||||
structure('no_links_in_toc', ['--no-links-in-toc'], default=False,
|
||||
help=_('Don\'t add links in the root HTML file to the Table of Contents'))
|
||||
|
||||
toc = c.add_group('toc',
|
||||
_('''\
|
||||
Control the automatic generation of a Table of Contents. If an OPF file is detected
|
||||
and it specifies a Table of Contents, then that will be used rather than trying
|
||||
to auto-generate a Table of Contents.
|
||||
''').replace('\n', ' '))
|
||||
toc('max_toc_recursion', ['--max-toc-recursion'], default=1,
|
||||
help=_('Number of levels of HTML files to try to autodetect TOC entries from. Set to 0 to disable all TOC autodetection. Default is %default.'))
|
||||
toc('max_toc_links', ['--max-toc-links'], default=40,
|
||||
help=_('Maximum number of links from each HTML file to insert into the TOC. Set to 0 to disable. Default is: %default.'))
|
||||
toc('no_chapters_in_toc', ['--no-chapters-in-toc'], default=False,
|
||||
help=_("Don't add auto-detected chapters to the Table of Contents."))
|
||||
toc('add_files_to_toc', ['--add-files-to-toc'], default=False,
|
||||
help=_('If more than one HTML file is found, create a TOC entry for each file.'))
|
||||
|
||||
|
||||
return c
|
@ -1,13 +1,16 @@
|
||||
from __future__ import with_statement
|
||||
from calibre.ebooks.metadata.opf import OPFReader
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
import os, sys, re, shutil
|
||||
import os, sys, re, shutil, cStringIO
|
||||
from lxml.etree import XPath
|
||||
|
||||
from calibre.ebooks.html import Parser, get_text, merge_metadata, get_filelist
|
||||
from calibre.ebooks.html import Parser, get_text, merge_metadata, get_filelist,\
|
||||
opf_traverse, create_metadata, rebase_toc
|
||||
from calibre.ebooks.epub import config as common_config
|
||||
from calibre.ptempfile import PersistentTemporaryDirectory
|
||||
from calibre.ptempfile import TemporaryDirectory
|
||||
from calibre.ebooks.metadata import MetaInformation
|
||||
|
||||
|
||||
class HTMLProcessor(Parser):
|
||||
@ -17,7 +20,7 @@ class HTMLProcessor(Parser):
|
||||
name='html2epub')
|
||||
if opts.verbose > 2:
|
||||
self.debug_tree('parsed')
|
||||
self.detected_chapters = self.opts.chapter(self.root)
|
||||
self.detect_chapters()
|
||||
self.extract_css()
|
||||
|
||||
if opts.verbose > 2:
|
||||
@ -27,6 +30,13 @@ class HTMLProcessor(Parser):
|
||||
|
||||
self.split()
|
||||
|
||||
def detect_chapters(self):
|
||||
self.detected_chapters = self.opts.chapter(self.root)
|
||||
for elem in self.detected_chapters:
|
||||
style = elem.get('style', '')
|
||||
style += ';page-break-before: always'
|
||||
elem.set(style, style)
|
||||
|
||||
def collect_font_statistics(self):
|
||||
'''
|
||||
Collect font statistics to figure out the base font size used in this
|
||||
@ -46,37 +56,44 @@ class HTMLProcessor(Parser):
|
||||
|
||||
|
||||
def config(defaults=None):
|
||||
c = common_config(defaults=defaults)
|
||||
return c
|
||||
return common_config(defaults=defaults)
|
||||
|
||||
def option_parser():
|
||||
c = config()
|
||||
return c.option_parser(usage=_('''\
|
||||
%prog [options] file.html
|
||||
%prog [options] file.html|opf
|
||||
|
||||
Convert a HTML file to an EPUB ebook. Follows links in the HTML file.
|
||||
Convert a HTML file to an EPUB ebook. Recursively follows links in the HTML file.
|
||||
If you specify an OPF file instead of an HTML file, the list of links is takes from
|
||||
the <spine> element of the OPF file.
|
||||
'''))
|
||||
|
||||
def parse_content(filelist, opts):
|
||||
tdir = PersistentTemporaryDirectory('_html2epub')
|
||||
def parse_content(filelist, opts, tdir):
|
||||
os.makedirs(os.path.join(tdir, 'content', 'resources'))
|
||||
resource_map = {}
|
||||
for htmlfile in filelist:
|
||||
hp = HTMLProcessor(htmlfile, opts, os.path.join(tdir, 'content'),
|
||||
resource_map, filelist)
|
||||
hp.save()
|
||||
return resource_map, hp.htmlfile_map
|
||||
|
||||
def convert(htmlfile, opts, notification=None):
|
||||
htmlfile = os.path.abspath(htmlfile)
|
||||
if opts.output is None:
|
||||
opts.output = os.path.splitext(os.path.basename(htmlfile))[0] + '.epub'
|
||||
opts.output = os.path.abspath(opts.output)
|
||||
if htmlfile.lower().endswith('.opf'):
|
||||
opf = OPFReader(htmlfile, os.path.dirname(os.path.abspath(htmlfile)))
|
||||
filelist = opf_traverse(opf, verbose=opts.verbose, encoding=opts.encoding)
|
||||
mi = MetaInformation(opf)
|
||||
else:
|
||||
opf, filelist = get_filelist(htmlfile, opts)
|
||||
mi = merge_metadata(htmlfile, opf, opts)
|
||||
opts.chapter = XPath(opts.chapter,
|
||||
namespaces={'re':'http://exslt.org/regular-expressions'})
|
||||
|
||||
resource_map = parse_content(filelist, opts)
|
||||
|
||||
with TemporaryDirectory('_html2epub') as tdir:
|
||||
resource_map, htmlfile_map = parse_content(filelist, opts, tdir)
|
||||
resources = [os.path.join(opts.output, 'content', f) for f in resource_map.values()]
|
||||
|
||||
if opf.cover and os.access(opf.cover, os.R_OK):
|
||||
@ -86,6 +103,18 @@ def convert(htmlfile, opts, notification=None):
|
||||
resources.append(cpath)
|
||||
mi.cover = cpath
|
||||
|
||||
spine = [htmlfile_map[f.path] for f in filelist]
|
||||
mi = create_metadata(tdir, mi, spine, resources)
|
||||
buf = cStringIO.StringIO()
|
||||
if mi.toc:
|
||||
rebase_toc(mi.toc, htmlfile_map, opts.output)
|
||||
with open(os.path.join(tdir, 'metadata.opf'), 'wb') as f:
|
||||
mi.render(f, buf)
|
||||
toc = buf.getvalue()
|
||||
if toc:
|
||||
with open(os.path.join(tdir, 'toc.ncx'), 'wb') as f:
|
||||
f.write(toc)
|
||||
|
||||
def main(args=sys.argv):
|
||||
parser = option_parser()
|
||||
opts, args = parser.parse_args(args)
|
||||
|
@ -1,10 +1,14 @@
|
||||
from __future__ import with_statement
|
||||
import cStringIO
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import sys, re, os, shutil, logging, tempfile
|
||||
'''
|
||||
Code to recursively parse HTML files and create an open ebook in a specified
|
||||
directory or zip file. All the action starts in :function:`create_dir`.
|
||||
'''
|
||||
|
||||
import sys, re, os, shutil, logging, tempfile, cStringIO
|
||||
from urlparse import urlparse
|
||||
from urllib import unquote
|
||||
|
||||
@ -445,10 +449,10 @@ class Parser(PreProcessor, LoggingInterface):
|
||||
self.raw_css = '\n\n'.join(css)
|
||||
# TODO: Figure out what to do about CSS imports from linked stylesheets
|
||||
|
||||
def config(defaults=None):
|
||||
desc = _('Options to control the traversal of HTML')
|
||||
def config(defaults=None, config_name='html',
|
||||
desc=_('Options to control the traversal of HTML')):
|
||||
if defaults is None:
|
||||
c = Config('html', desc)
|
||||
c = Config(config_name, desc)
|
||||
else:
|
||||
c = StringConfig(defaults, desc)
|
||||
|
||||
@ -482,10 +486,12 @@ def config(defaults=None):
|
||||
def option_parser():
|
||||
c = config()
|
||||
return c.option_parser(usage=_('''\
|
||||
%prog [options] file.html
|
||||
%prog [options] file.html|opf
|
||||
|
||||
Follow all links in an HTML file and collect them into the specified directory.
|
||||
Also collects any references resources like images, stylesheets, scripts, etc.
|
||||
If an OPF file is specified instead, the list of files in its <spine> element
|
||||
is used.
|
||||
'''))
|
||||
|
||||
def search_for_opf(dir):
|
||||
@ -566,7 +572,8 @@ def create_metadata(basepath, mi, filelist, resources):
|
||||
|
||||
def rebase_toc(toc, htmlfile_map, basepath, root=True):
|
||||
'''
|
||||
Rebase a :class:`calibre.ebooks.metadata.toc.TOC` object.
|
||||
Rebase a :class:`calibre.ebooks.metadata.toc.TOC` object. Maps all entries
|
||||
in the TOC to point to their new locations relative to the new OPF file.
|
||||
'''
|
||||
def fix_entry(entry):
|
||||
if entry.abspath in htmlfile_map.keys():
|
||||
@ -582,15 +589,23 @@ def create_dir(htmlfile, opts):
|
||||
'''
|
||||
Create a directory that contains the open ebook
|
||||
'''
|
||||
if htmlfile.lower().endswith('.opf'):
|
||||
opf = OPFReader(open(htmlfile, 'rb'), os.path.dirname(os.path.abspath(htmlfile)))
|
||||
filelist = opf_traverse(opf, verbose=opts.verbose, encoding=opts.encoding)
|
||||
mi = MetaInformation(opf)
|
||||
else:
|
||||
opf, filelist = get_filelist(htmlfile, opts)
|
||||
mi = merge_metadata(htmlfile, opf, opts)
|
||||
|
||||
resource_map, htmlfile_map = parse_content(filelist, opts)
|
||||
resources = [os.path.join(opts.output, 'content', f) for f in resource_map.values()]
|
||||
|
||||
if opf and opf.cover and os.access(opf.cover, os.R_OK):
|
||||
cpath = os.path.join(opts.output, 'content', 'resources', '_cover_'+os.path.splitext(opf.cover)[-1])
|
||||
shutil.copyfile(opf.cover, cpath)
|
||||
resources.append(cpath)
|
||||
mi.cover = cpath
|
||||
|
||||
spine = [htmlfile_map[f.path] for f in filelist]
|
||||
mi = create_metadata(opts.output, mi, spine, resources)
|
||||
buf = cStringIO.StringIO()
|
||||
|
@ -105,7 +105,6 @@ def set_metadata(stream, mi):
|
||||
reader.opf.smart_update(mi)
|
||||
newopf = StringIO(reader.opf.render())
|
||||
safe_replace(stream, reader.container[OPF.MIMETYPE], newopf)
|
||||
print newopf.getvalue()
|
||||
|
||||
def option_parser():
|
||||
parser = get_parser('epub')
|
||||
|
@ -150,7 +150,7 @@ class OPF(object):
|
||||
def fset(self, val):
|
||||
matches = self.isbn_path(self.tree)
|
||||
if not matches:
|
||||
matches = [self.create_metadata_element('dc:identifier',
|
||||
matches = [self.create_metadata_element('identifier', ns='dc',
|
||||
attrib={'{%s}scheme'%self.NAMESPACES['opf']:'ISBN'})]
|
||||
matches[0].text = unicode(val)
|
||||
return property(fget=fget, fset=fset)
|
||||
|
Loading…
x
Reference in New Issue
Block a user