mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
IGN:Fix more minor regressions
This commit is contained in:
parent
f7bf112ae2
commit
829a344fe9
@ -6,7 +6,7 @@ __docformat__ = 'restructuredtext en'
|
|||||||
'''
|
'''
|
||||||
Conversion to EPUB.
|
Conversion to EPUB.
|
||||||
'''
|
'''
|
||||||
import sys
|
import sys, textwrap
|
||||||
from calibre.utils.config import Config, StringConfig
|
from calibre.utils.config import Config, StringConfig
|
||||||
from calibre.utils.zipfile import ZipFile, ZIP_DEFLATED
|
from calibre.utils.zipfile import ZipFile, ZIP_DEFLATED
|
||||||
from calibre.ebooks.html import config as common_config
|
from calibre.ebooks.html import config as common_config
|
||||||
@ -53,9 +53,21 @@ The expression used must evaluate to a list of elements. To disable chapter dete
|
|||||||
use the expression "/". See the XPath Tutorial in the calibre User Manual for further
|
use the expression "/". See the XPath Tutorial in the calibre User Manual for further
|
||||||
help on using this feature.
|
help on using this feature.
|
||||||
''').replace('\n', ' '))
|
''').replace('\n', ' '))
|
||||||
structure('no_chapters_in_toc', ['--no-chapters-in-toc'], default=False,
|
|
||||||
help=_('Don\'t add detected chapters to the Table of Contents'))
|
toc = c.add_group('toc',
|
||||||
structure('no_links_in_toc', ['--no-links-in-toc'], default=False,
|
_('''\
|
||||||
help=_('Don\'t add links in the root HTML file to the Table of Contents'))
|
Control the automatic generation of a Table of Contents. If an OPF file is detected
|
||||||
|
and it specifies a Table of Contents, then that will be used rather than trying
|
||||||
|
to auto-generate a Table of Contents.
|
||||||
|
''').replace('\n', ' '))
|
||||||
|
toc('max_toc_recursion', ['--max-toc-recursion'], default=1,
|
||||||
|
help=_('Number of levels of HTML files to try to autodetect TOC entries from. Set to 0 to disable all TOC autodetection. Default is %default.'))
|
||||||
|
toc('max_toc_links', ['--max-toc-links'], default=40,
|
||||||
|
help=_('Maximum number of links from each HTML file to insert into the TOC. Set to 0 to disable. Default is: %default.'))
|
||||||
|
toc('no_chapters_in_toc', ['--no-chapters-in-toc'], default=False,
|
||||||
|
help=_("Don't add auto-detected chapters to the Table of Contents."))
|
||||||
|
toc('add_files_to_toc', ['--add-files-to-toc'], default=False,
|
||||||
|
help=_('If more than one HTML file is found, create a TOC entry for each file.'))
|
||||||
|
|
||||||
|
|
||||||
return c
|
return c
|
@ -1,13 +1,16 @@
|
|||||||
from __future__ import with_statement
|
from __future__ import with_statement
|
||||||
|
from calibre.ebooks.metadata.opf import OPFReader
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
import os, sys, re, shutil
|
import os, sys, re, shutil, cStringIO
|
||||||
from lxml.etree import XPath
|
from lxml.etree import XPath
|
||||||
|
|
||||||
from calibre.ebooks.html import Parser, get_text, merge_metadata, get_filelist
|
from calibre.ebooks.html import Parser, get_text, merge_metadata, get_filelist,\
|
||||||
|
opf_traverse, create_metadata, rebase_toc
|
||||||
from calibre.ebooks.epub import config as common_config
|
from calibre.ebooks.epub import config as common_config
|
||||||
from calibre.ptempfile import PersistentTemporaryDirectory
|
from calibre.ptempfile import TemporaryDirectory
|
||||||
|
from calibre.ebooks.metadata import MetaInformation
|
||||||
|
|
||||||
|
|
||||||
class HTMLProcessor(Parser):
|
class HTMLProcessor(Parser):
|
||||||
@ -17,7 +20,7 @@ class HTMLProcessor(Parser):
|
|||||||
name='html2epub')
|
name='html2epub')
|
||||||
if opts.verbose > 2:
|
if opts.verbose > 2:
|
||||||
self.debug_tree('parsed')
|
self.debug_tree('parsed')
|
||||||
self.detected_chapters = self.opts.chapter(self.root)
|
self.detect_chapters()
|
||||||
self.extract_css()
|
self.extract_css()
|
||||||
|
|
||||||
if opts.verbose > 2:
|
if opts.verbose > 2:
|
||||||
@ -27,6 +30,13 @@ class HTMLProcessor(Parser):
|
|||||||
|
|
||||||
self.split()
|
self.split()
|
||||||
|
|
||||||
|
def detect_chapters(self):
|
||||||
|
self.detected_chapters = self.opts.chapter(self.root)
|
||||||
|
for elem in self.detected_chapters:
|
||||||
|
style = elem.get('style', '')
|
||||||
|
style += ';page-break-before: always'
|
||||||
|
elem.set(style, style)
|
||||||
|
|
||||||
def collect_font_statistics(self):
|
def collect_font_statistics(self):
|
||||||
'''
|
'''
|
||||||
Collect font statistics to figure out the base font size used in this
|
Collect font statistics to figure out the base font size used in this
|
||||||
@ -46,37 +56,44 @@ class HTMLProcessor(Parser):
|
|||||||
|
|
||||||
|
|
||||||
def config(defaults=None):
|
def config(defaults=None):
|
||||||
c = common_config(defaults=defaults)
|
return common_config(defaults=defaults)
|
||||||
return c
|
|
||||||
|
|
||||||
def option_parser():
|
def option_parser():
|
||||||
c = config()
|
c = config()
|
||||||
return c.option_parser(usage=_('''\
|
return c.option_parser(usage=_('''\
|
||||||
%prog [options] file.html
|
%prog [options] file.html|opf
|
||||||
|
|
||||||
Convert a HTML file to an EPUB ebook. Follows links in the HTML file.
|
Convert a HTML file to an EPUB ebook. Recursively follows links in the HTML file.
|
||||||
|
If you specify an OPF file instead of an HTML file, the list of links is takes from
|
||||||
|
the <spine> element of the OPF file.
|
||||||
'''))
|
'''))
|
||||||
|
|
||||||
def parse_content(filelist, opts):
|
def parse_content(filelist, opts, tdir):
|
||||||
tdir = PersistentTemporaryDirectory('_html2epub')
|
|
||||||
os.makedirs(os.path.join(tdir, 'content', 'resources'))
|
os.makedirs(os.path.join(tdir, 'content', 'resources'))
|
||||||
resource_map = {}
|
resource_map = {}
|
||||||
for htmlfile in filelist:
|
for htmlfile in filelist:
|
||||||
hp = HTMLProcessor(htmlfile, opts, os.path.join(tdir, 'content'),
|
hp = HTMLProcessor(htmlfile, opts, os.path.join(tdir, 'content'),
|
||||||
resource_map, filelist)
|
resource_map, filelist)
|
||||||
|
hp.save()
|
||||||
|
return resource_map, hp.htmlfile_map
|
||||||
|
|
||||||
def convert(htmlfile, opts, notification=None):
|
def convert(htmlfile, opts, notification=None):
|
||||||
htmlfile = os.path.abspath(htmlfile)
|
htmlfile = os.path.abspath(htmlfile)
|
||||||
if opts.output is None:
|
if opts.output is None:
|
||||||
opts.output = os.path.splitext(os.path.basename(htmlfile))[0] + '.epub'
|
opts.output = os.path.splitext(os.path.basename(htmlfile))[0] + '.epub'
|
||||||
opts.output = os.path.abspath(opts.output)
|
opts.output = os.path.abspath(opts.output)
|
||||||
|
if htmlfile.lower().endswith('.opf'):
|
||||||
|
opf = OPFReader(htmlfile, os.path.dirname(os.path.abspath(htmlfile)))
|
||||||
|
filelist = opf_traverse(opf, verbose=opts.verbose, encoding=opts.encoding)
|
||||||
|
mi = MetaInformation(opf)
|
||||||
|
else:
|
||||||
opf, filelist = get_filelist(htmlfile, opts)
|
opf, filelist = get_filelist(htmlfile, opts)
|
||||||
mi = merge_metadata(htmlfile, opf, opts)
|
mi = merge_metadata(htmlfile, opf, opts)
|
||||||
opts.chapter = XPath(opts.chapter,
|
opts.chapter = XPath(opts.chapter,
|
||||||
namespaces={'re':'http://exslt.org/regular-expressions'})
|
namespaces={'re':'http://exslt.org/regular-expressions'})
|
||||||
|
|
||||||
resource_map = parse_content(filelist, opts)
|
with TemporaryDirectory('_html2epub') as tdir:
|
||||||
|
resource_map, htmlfile_map = parse_content(filelist, opts, tdir)
|
||||||
resources = [os.path.join(opts.output, 'content', f) for f in resource_map.values()]
|
resources = [os.path.join(opts.output, 'content', f) for f in resource_map.values()]
|
||||||
|
|
||||||
if opf.cover and os.access(opf.cover, os.R_OK):
|
if opf.cover and os.access(opf.cover, os.R_OK):
|
||||||
@ -86,6 +103,18 @@ def convert(htmlfile, opts, notification=None):
|
|||||||
resources.append(cpath)
|
resources.append(cpath)
|
||||||
mi.cover = cpath
|
mi.cover = cpath
|
||||||
|
|
||||||
|
spine = [htmlfile_map[f.path] for f in filelist]
|
||||||
|
mi = create_metadata(tdir, mi, spine, resources)
|
||||||
|
buf = cStringIO.StringIO()
|
||||||
|
if mi.toc:
|
||||||
|
rebase_toc(mi.toc, htmlfile_map, opts.output)
|
||||||
|
with open(os.path.join(tdir, 'metadata.opf'), 'wb') as f:
|
||||||
|
mi.render(f, buf)
|
||||||
|
toc = buf.getvalue()
|
||||||
|
if toc:
|
||||||
|
with open(os.path.join(tdir, 'toc.ncx'), 'wb') as f:
|
||||||
|
f.write(toc)
|
||||||
|
|
||||||
def main(args=sys.argv):
|
def main(args=sys.argv):
|
||||||
parser = option_parser()
|
parser = option_parser()
|
||||||
opts, args = parser.parse_args(args)
|
opts, args = parser.parse_args(args)
|
||||||
|
@ -1,10 +1,14 @@
|
|||||||
from __future__ import with_statement
|
from __future__ import with_statement
|
||||||
import cStringIO
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import sys, re, os, shutil, logging, tempfile
|
'''
|
||||||
|
Code to recursively parse HTML files and create an open ebook in a specified
|
||||||
|
directory or zip file. All the action starts in :function:`create_dir`.
|
||||||
|
'''
|
||||||
|
|
||||||
|
import sys, re, os, shutil, logging, tempfile, cStringIO
|
||||||
from urlparse import urlparse
|
from urlparse import urlparse
|
||||||
from urllib import unquote
|
from urllib import unquote
|
||||||
|
|
||||||
@ -445,10 +449,10 @@ class Parser(PreProcessor, LoggingInterface):
|
|||||||
self.raw_css = '\n\n'.join(css)
|
self.raw_css = '\n\n'.join(css)
|
||||||
# TODO: Figure out what to do about CSS imports from linked stylesheets
|
# TODO: Figure out what to do about CSS imports from linked stylesheets
|
||||||
|
|
||||||
def config(defaults=None):
|
def config(defaults=None, config_name='html',
|
||||||
desc = _('Options to control the traversal of HTML')
|
desc=_('Options to control the traversal of HTML')):
|
||||||
if defaults is None:
|
if defaults is None:
|
||||||
c = Config('html', desc)
|
c = Config(config_name, desc)
|
||||||
else:
|
else:
|
||||||
c = StringConfig(defaults, desc)
|
c = StringConfig(defaults, desc)
|
||||||
|
|
||||||
@ -482,10 +486,12 @@ def config(defaults=None):
|
|||||||
def option_parser():
|
def option_parser():
|
||||||
c = config()
|
c = config()
|
||||||
return c.option_parser(usage=_('''\
|
return c.option_parser(usage=_('''\
|
||||||
%prog [options] file.html
|
%prog [options] file.html|opf
|
||||||
|
|
||||||
Follow all links in an HTML file and collect them into the specified directory.
|
Follow all links in an HTML file and collect them into the specified directory.
|
||||||
Also collects any references resources like images, stylesheets, scripts, etc.
|
Also collects any references resources like images, stylesheets, scripts, etc.
|
||||||
|
If an OPF file is specified instead, the list of files in its <spine> element
|
||||||
|
is used.
|
||||||
'''))
|
'''))
|
||||||
|
|
||||||
def search_for_opf(dir):
|
def search_for_opf(dir):
|
||||||
@ -566,7 +572,8 @@ def create_metadata(basepath, mi, filelist, resources):
|
|||||||
|
|
||||||
def rebase_toc(toc, htmlfile_map, basepath, root=True):
|
def rebase_toc(toc, htmlfile_map, basepath, root=True):
|
||||||
'''
|
'''
|
||||||
Rebase a :class:`calibre.ebooks.metadata.toc.TOC` object.
|
Rebase a :class:`calibre.ebooks.metadata.toc.TOC` object. Maps all entries
|
||||||
|
in the TOC to point to their new locations relative to the new OPF file.
|
||||||
'''
|
'''
|
||||||
def fix_entry(entry):
|
def fix_entry(entry):
|
||||||
if entry.abspath in htmlfile_map.keys():
|
if entry.abspath in htmlfile_map.keys():
|
||||||
@ -582,15 +589,23 @@ def create_dir(htmlfile, opts):
|
|||||||
'''
|
'''
|
||||||
Create a directory that contains the open ebook
|
Create a directory that contains the open ebook
|
||||||
'''
|
'''
|
||||||
|
if htmlfile.lower().endswith('.opf'):
|
||||||
|
opf = OPFReader(open(htmlfile, 'rb'), os.path.dirname(os.path.abspath(htmlfile)))
|
||||||
|
filelist = opf_traverse(opf, verbose=opts.verbose, encoding=opts.encoding)
|
||||||
|
mi = MetaInformation(opf)
|
||||||
|
else:
|
||||||
opf, filelist = get_filelist(htmlfile, opts)
|
opf, filelist = get_filelist(htmlfile, opts)
|
||||||
mi = merge_metadata(htmlfile, opf, opts)
|
mi = merge_metadata(htmlfile, opf, opts)
|
||||||
|
|
||||||
resource_map, htmlfile_map = parse_content(filelist, opts)
|
resource_map, htmlfile_map = parse_content(filelist, opts)
|
||||||
resources = [os.path.join(opts.output, 'content', f) for f in resource_map.values()]
|
resources = [os.path.join(opts.output, 'content', f) for f in resource_map.values()]
|
||||||
|
|
||||||
if opf and opf.cover and os.access(opf.cover, os.R_OK):
|
if opf and opf.cover and os.access(opf.cover, os.R_OK):
|
||||||
cpath = os.path.join(opts.output, 'content', 'resources', '_cover_'+os.path.splitext(opf.cover)[-1])
|
cpath = os.path.join(opts.output, 'content', 'resources', '_cover_'+os.path.splitext(opf.cover)[-1])
|
||||||
shutil.copyfile(opf.cover, cpath)
|
shutil.copyfile(opf.cover, cpath)
|
||||||
resources.append(cpath)
|
resources.append(cpath)
|
||||||
mi.cover = cpath
|
mi.cover = cpath
|
||||||
|
|
||||||
spine = [htmlfile_map[f.path] for f in filelist]
|
spine = [htmlfile_map[f.path] for f in filelist]
|
||||||
mi = create_metadata(opts.output, mi, spine, resources)
|
mi = create_metadata(opts.output, mi, spine, resources)
|
||||||
buf = cStringIO.StringIO()
|
buf = cStringIO.StringIO()
|
||||||
|
@ -105,7 +105,6 @@ def set_metadata(stream, mi):
|
|||||||
reader.opf.smart_update(mi)
|
reader.opf.smart_update(mi)
|
||||||
newopf = StringIO(reader.opf.render())
|
newopf = StringIO(reader.opf.render())
|
||||||
safe_replace(stream, reader.container[OPF.MIMETYPE], newopf)
|
safe_replace(stream, reader.container[OPF.MIMETYPE], newopf)
|
||||||
print newopf.getvalue()
|
|
||||||
|
|
||||||
def option_parser():
|
def option_parser():
|
||||||
parser = get_parser('epub')
|
parser = get_parser('epub')
|
||||||
|
@ -150,7 +150,7 @@ class OPF(object):
|
|||||||
def fset(self, val):
|
def fset(self, val):
|
||||||
matches = self.isbn_path(self.tree)
|
matches = self.isbn_path(self.tree)
|
||||||
if not matches:
|
if not matches:
|
||||||
matches = [self.create_metadata_element('dc:identifier',
|
matches = [self.create_metadata_element('identifier', ns='dc',
|
||||||
attrib={'{%s}scheme'%self.NAMESPACES['opf']:'ISBN'})]
|
attrib={'{%s}scheme'%self.NAMESPACES['opf']:'ISBN'})]
|
||||||
matches[0].text = unicode(val)
|
matches[0].text = unicode(val)
|
||||||
return property(fget=fget, fset=fset)
|
return property(fget=fget, fset=fset)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user