mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
IGN:Various fixes to html2epub
This commit is contained in:
parent
f218e5bb87
commit
8c53abe905
@ -8,7 +8,7 @@ Conversion to EPUB.
|
|||||||
'''
|
'''
|
||||||
import sys, textwrap
|
import sys, textwrap
|
||||||
from calibre.utils.config import Config, StringConfig
|
from calibre.utils.config import Config, StringConfig
|
||||||
from calibre.utils.zipfile import ZipFile, ZIP_DEFLATED
|
from calibre.utils.zipfile import ZipFile, ZIP_STORED
|
||||||
from calibre.ebooks.html import config as common_config
|
from calibre.ebooks.html import config as common_config
|
||||||
|
|
||||||
def initialize_container(path_to_container, opf_name='metadata.opf'):
|
def initialize_container(path_to_container, opf_name='metadata.opf'):
|
||||||
@ -24,7 +24,7 @@ def initialize_container(path_to_container, opf_name='metadata.opf'):
|
|||||||
</container>
|
</container>
|
||||||
'''%opf_name
|
'''%opf_name
|
||||||
zf = ZipFile(path_to_container, 'w')
|
zf = ZipFile(path_to_container, 'w')
|
||||||
zf.writestr('mimetype', 'application/epub+zip', compression=ZIP_DEFLATED)
|
zf.writestr('mimetype', 'application/epub+zip', compression=ZIP_STORED)
|
||||||
zf.writestr('META-INF/', '', 0700)
|
zf.writestr('META-INF/', '', 0700)
|
||||||
zf.writestr('META-INF/container.xml', CONTAINER)
|
zf.writestr('META-INF/container.xml', CONTAINER)
|
||||||
return zf
|
return zf
|
||||||
@ -67,5 +67,7 @@ to auto-generate a Table of Contents.
|
|||||||
toc('no_chapters_in_toc', ['--no-chapters-in-toc'], default=False,
|
toc('no_chapters_in_toc', ['--no-chapters-in-toc'], default=False,
|
||||||
help=_("Don't add auto-detected chapters to the Table of Contents."))
|
help=_("Don't add auto-detected chapters to the Table of Contents."))
|
||||||
|
|
||||||
|
c.add_opt('show_opf', ['--show-opf'], default=False, group='debug',
|
||||||
|
help=_('Print generated OPF file to stdout'))
|
||||||
|
|
||||||
return c
|
return c
|
@ -12,6 +12,7 @@ from calibre.ebooks.epub import config as common_config
|
|||||||
from calibre.ptempfile import TemporaryDirectory
|
from calibre.ptempfile import TemporaryDirectory
|
||||||
from calibre.ebooks.metadata import MetaInformation
|
from calibre.ebooks.metadata import MetaInformation
|
||||||
from calibre.ebooks.metadata.toc import TOC
|
from calibre.ebooks.metadata.toc import TOC
|
||||||
|
from calibre.ebooks.epub import initialize_container
|
||||||
|
|
||||||
|
|
||||||
class HTMLProcessor(Processor):
|
class HTMLProcessor(Processor):
|
||||||
@ -93,10 +94,10 @@ def convert(htmlfile, opts, notification=None):
|
|||||||
|
|
||||||
with TemporaryDirectory('_html2epub') as tdir:
|
with TemporaryDirectory('_html2epub') as tdir:
|
||||||
resource_map, htmlfile_map, generated_toc = parse_content(filelist, opts, tdir)
|
resource_map, htmlfile_map, generated_toc = parse_content(filelist, opts, tdir)
|
||||||
resources = [os.path.join(opts.output, 'content', f) for f in resource_map.values()]
|
resources = [os.path.join(tdir, 'content', f) for f in resource_map.values()]
|
||||||
|
|
||||||
if opf.cover and os.access(opf.cover, os.R_OK):
|
if mi.cover and os.access(mi.cover, os.R_OK):
|
||||||
shutil.copyfile(opf.cover, os.path.join(opts.output, 'content', 'resources', '_cover_'+os.path.splitext(opf.cover)))
|
shutil.copyfile(mi.cover, os.path.join(opts.output, 'content', 'resources', '_cover_'+os.path.splitext(opf.cover)))
|
||||||
cpath = os.path.join(opts.output, 'content', 'resources', '_cover_'+os.path.splitext(opf.cover))
|
cpath = os.path.join(opts.output, 'content', 'resources', '_cover_'+os.path.splitext(opf.cover))
|
||||||
shutil.copyfile(opf.cover, cpath)
|
shutil.copyfile(opf.cover, cpath)
|
||||||
resources.append(cpath)
|
resources.append(cpath)
|
||||||
@ -109,13 +110,23 @@ def convert(htmlfile, opts, notification=None):
|
|||||||
rebase_toc(mi.toc, htmlfile_map, opts.output)
|
rebase_toc(mi.toc, htmlfile_map, opts.output)
|
||||||
if mi.toc is None or len(mi.toc) < 2:
|
if mi.toc is None or len(mi.toc) < 2:
|
||||||
mi.toc = generated_toc
|
mi.toc = generated_toc
|
||||||
|
for item in mi.manifest:
|
||||||
|
if getattr(item, 'mime_type', None) == 'text/html':
|
||||||
|
item.mime_type = 'application/xhtml+xml'
|
||||||
with open(os.path.join(tdir, 'metadata.opf'), 'wb') as f:
|
with open(os.path.join(tdir, 'metadata.opf'), 'wb') as f:
|
||||||
mi.render(f, buf)
|
mi.render(f, buf)
|
||||||
|
if opts.show_opf:
|
||||||
|
print open(os.path.join(tdir, 'metadata.opf')).read()
|
||||||
toc = buf.getvalue()
|
toc = buf.getvalue()
|
||||||
if toc:
|
if toc:
|
||||||
with open(os.path.join(tdir, 'toc.ncx'), 'wb') as f:
|
with open(os.path.join(tdir, 'toc.ncx'), 'wb') as f:
|
||||||
f.write(toc)
|
f.write(toc)
|
||||||
|
|
||||||
|
epub = initialize_container(opts.output)
|
||||||
|
epub.add_dir(tdir)
|
||||||
|
print 'Output written to', opts.output
|
||||||
|
|
||||||
|
|
||||||
def main(args=sys.argv):
|
def main(args=sys.argv):
|
||||||
parser = option_parser()
|
parser = option_parser()
|
||||||
opts, args = parser.parse_args(args)
|
opts, args = parser.parse_args(args)
|
||||||
|
@ -205,7 +205,6 @@ def traverse(path_to_html_file, max_levels=sys.maxint, verbose=0, encoding=None)
|
|||||||
hf.links.remove(link)
|
hf.links.remove(link)
|
||||||
|
|
||||||
next_level = list(nl)
|
next_level = list(nl)
|
||||||
|
|
||||||
return flat, list(depth_first(flat[0], flat))
|
return flat, list(depth_first(flat[0], flat))
|
||||||
|
|
||||||
|
|
||||||
@ -309,6 +308,7 @@ class Parser(PreProcessor, LoggingInterface):
|
|||||||
self.resource_dir = os.path.join(tdir, 'resources')
|
self.resource_dir = os.path.join(tdir, 'resources')
|
||||||
save_counter = 1
|
save_counter = 1
|
||||||
self.htmlfile_map = {}
|
self.htmlfile_map = {}
|
||||||
|
self.level = self.htmlfile.level
|
||||||
for f in self.htmlfiles:
|
for f in self.htmlfiles:
|
||||||
name = os.path.basename(f.path)
|
name = os.path.basename(f.path)
|
||||||
if name in self.htmlfile_map.values():
|
if name in self.htmlfile_map.values():
|
||||||
@ -362,8 +362,8 @@ class Parser(PreProcessor, LoggingInterface):
|
|||||||
tdir = tempfile.gettempdir()
|
tdir = tempfile.gettempdir()
|
||||||
if not os.path.exists(tdir):
|
if not os.path.exists(tdir):
|
||||||
os.makedirs(tdir)
|
os.makedirs(tdir)
|
||||||
with open(os.path.join(tdir, '%s-%s-%s.html'%\
|
with open(os.path.join(tdir, '%s-%s.html'%\
|
||||||
(self.name, os.path.basename(self.htmlfile.path), name)), 'wb') as f:
|
(os.path.basename(self.htmlfile.path), name)), 'wb') as f:
|
||||||
f.write(html.tostring(self.root, encoding='utf-8'))
|
f.write(html.tostring(self.root, encoding='utf-8'))
|
||||||
self.log_debug(_('Written processed HTML to ')+f.name)
|
self.log_debug(_('Written processed HTML to ')+f.name)
|
||||||
|
|
||||||
@ -381,6 +381,8 @@ class Parser(PreProcessor, LoggingInterface):
|
|||||||
return olink
|
return olink
|
||||||
if link.path in self.htmlfiles:
|
if link.path in self.htmlfiles:
|
||||||
return self.htmlfile_map[link.path]
|
return self.htmlfile_map[link.path]
|
||||||
|
if re.match(r'\.(x){0,1}htm(l){0,1}', os.path.splitext(link.path)[1]) is not None:
|
||||||
|
return olink # This happens when --max-levels is used
|
||||||
if link.path in self.resource_map.keys():
|
if link.path in self.resource_map.keys():
|
||||||
return self.resource_map[link.path]
|
return self.resource_map[link.path]
|
||||||
name = os.path.basename(link.path)
|
name = os.path.basename(link.path)
|
||||||
@ -441,14 +443,14 @@ class Processor(Parser):
|
|||||||
text = text[:50] + u'\u2026'
|
text = text[:50] + u'\u2026'
|
||||||
return target.add_item(href, fragment, text)
|
return target.add_item(href, fragment, text)
|
||||||
|
|
||||||
name = self.htmlfile_map[self.htmlfile]
|
name = self.htmlfile_map[self.htmlfile.path]
|
||||||
href = 'content/'+name
|
href = 'content/'+name
|
||||||
|
|
||||||
if referrer.href != href: # Happens for root file
|
if referrer.href != href: # Happens for root file
|
||||||
target = add_item(href, None, self.htmlfile.title, referrer)
|
target = add_item(href, None, self.htmlfile.title, referrer)
|
||||||
|
|
||||||
# Add links to TOC
|
# Add links to TOC
|
||||||
if self.opts.max_toc_links > 0:
|
if int(self.opts.max_toc_links) > 0:
|
||||||
for link in list(self.LINKS_PATH(self.root))[:self.opts.max_toc_links]:
|
for link in list(self.LINKS_PATH(self.root))[:self.opts.max_toc_links]:
|
||||||
text = (u''.join(link.xpath('string()'))).strip()
|
text = (u''.join(link.xpath('string()'))).strip()
|
||||||
if text:
|
if text:
|
||||||
@ -468,7 +470,7 @@ class Processor(Parser):
|
|||||||
for elem in getattr(self, 'detected_chapters', []):
|
for elem in getattr(self, 'detected_chapters', []):
|
||||||
text = (u''.join(elem.xpath('string()'))).strip()
|
text = (u''.join(elem.xpath('string()'))).strip()
|
||||||
if text:
|
if text:
|
||||||
name = self.htmlfile_map[self.path]
|
name = self.htmlfile_map[self.htmlfile.path]
|
||||||
href = 'content/'+name
|
href = 'content/'+name
|
||||||
add_item(href, None, text, target)
|
add_item(href, None, text, target)
|
||||||
|
|
||||||
@ -479,9 +481,9 @@ class Processor(Parser):
|
|||||||
This includes <font> tags.
|
This includes <font> tags.
|
||||||
'''
|
'''
|
||||||
counter = 0
|
counter = 0
|
||||||
def get_id(chapter, prefix='calibre_css_'):
|
|
||||||
|
def get_id(chapter, counter, prefix='calibre_css_'):
|
||||||
new_id = '%s_%d'%(prefix, counter)
|
new_id = '%s_%d'%(prefix, counter)
|
||||||
counter += 1
|
|
||||||
if chapter.tag.lower() == 'a' and 'name' in chapter.keys():
|
if chapter.tag.lower() == 'a' and 'name' in chapter.keys():
|
||||||
chapter.attrib['id'] = id = chapter.get('name')
|
chapter.attrib['id'] = id = chapter.get('name')
|
||||||
if not id:
|
if not id:
|
||||||
@ -497,14 +499,14 @@ class Processor(Parser):
|
|||||||
css = []
|
css = []
|
||||||
for link in self.root.xpath('//link'):
|
for link in self.root.xpath('//link'):
|
||||||
if 'css' in link.get('type', 'text/css').lower():
|
if 'css' in link.get('type', 'text/css').lower():
|
||||||
file = self.htmlfile.resolve(link.get('href', ''))
|
file = self.htmlfile.resolve(unicode(link.get('href', ''), self.htmlfile.encoding)).path
|
||||||
if os.path.exists(file) and os.path.isfile(file):
|
if file and os.path.exists(file) and os.path.isfile(file):
|
||||||
css.append(open(file, 'rb').read().decode('utf-8'))
|
css.append(open(file, 'rb').read().decode('utf-8'))
|
||||||
link.getparent().remove(link)
|
link.getparent().remove(link)
|
||||||
|
|
||||||
for style in self.root.xpath('//style'):
|
for style in self.root.xpath('//style'):
|
||||||
if 'css' in style.get('type', 'text/css').lower():
|
if 'css' in style.get('type', 'text/css').lower():
|
||||||
css.append('\n'.join(get_text(style)))
|
css.append('\n'.join(style.xpath('./text()')))
|
||||||
style.getparent().remove(style)
|
style.getparent().remove(style)
|
||||||
|
|
||||||
for font in self.root.xpath('//font'):
|
for font in self.root.xpath('//font'):
|
||||||
@ -519,12 +521,14 @@ class Processor(Parser):
|
|||||||
color = font.attrib.pop('color', None)
|
color = font.attrib.pop('color', None)
|
||||||
if color is not None:
|
if color is not None:
|
||||||
setting += 'color:%s'%color
|
setting += 'color:%s'%color
|
||||||
id = get_id(font)
|
id = get_id(font, counter)
|
||||||
|
counter += 1
|
||||||
css.append('#%s { %s }'%(id, setting))
|
css.append('#%s { %s }'%(id, setting))
|
||||||
|
|
||||||
for elem in self.root.xpath('//*[@style]'):
|
for elem in self.root.xpath('//*[@style]'):
|
||||||
if 'id' not in elem.keys():
|
if 'id' not in elem.keys():
|
||||||
id = get_id(elem)
|
id = get_id(elem, counter)
|
||||||
|
counter += 1
|
||||||
css.append('#%s {%s}'%(id, elem.get('style')))
|
css.append('#%s {%s}'%(id, elem.get('style')))
|
||||||
elem.attrib.pop('style')
|
elem.attrib.pop('style')
|
||||||
|
|
||||||
@ -597,7 +601,8 @@ def get_filelist(htmlfile, opts):
|
|||||||
if opf is not None:
|
if opf is not None:
|
||||||
filelist = opf_traverse(opf, verbose=opts.verbose, encoding=opts.encoding)
|
filelist = opf_traverse(opf, verbose=opts.verbose, encoding=opts.encoding)
|
||||||
if not filelist:
|
if not filelist:
|
||||||
filelist = traverse(htmlfile, verbose=opts.verbose, encoding=opts.encoding)\
|
filelist = traverse(htmlfile, max_levels=int(opts.max_levels),
|
||||||
|
verbose=opts.verbose, encoding=opts.encoding)\
|
||||||
[0 if opts.breadth_first else 1]
|
[0 if opts.breadth_first else 1]
|
||||||
if opts.verbose:
|
if opts.verbose:
|
||||||
print '\tFound files...'
|
print '\tFound files...'
|
||||||
|
@ -210,7 +210,7 @@ def setup_completion(fatal_errors):
|
|||||||
f.write(opts_and_exts('comic2lrf', comicop, ['cbz', 'cbr']))
|
f.write(opts_and_exts('comic2lrf', comicop, ['cbz', 'cbr']))
|
||||||
f.write(opts_and_words('feeds2disk', feeds2disk, feed_titles))
|
f.write(opts_and_words('feeds2disk', feeds2disk, feed_titles))
|
||||||
f.write(opts_and_words('feeds2lrf', feeds2lrf, feed_titles))
|
f.write(opts_and_words('feeds2lrf', feeds2lrf, feed_titles))
|
||||||
f.write(opts_and_exts('html2epub', html2epub, ['html', 'htm', 'xhtm', 'xhtml']))
|
f.write(opts_and_exts('html2epub', html2epub, ['html', 'htm', 'xhtm', 'xhtml', 'opf']))
|
||||||
f.write(opts_and_exts('html2oeb', html2oeb, ['html', 'htm', 'xhtm', 'xhtml']))
|
f.write(opts_and_exts('html2oeb', html2oeb, ['html', 'htm', 'xhtm', 'xhtml']))
|
||||||
f.write('''
|
f.write('''
|
||||||
_prs500_ls()
|
_prs500_ls()
|
||||||
|
@ -162,6 +162,12 @@ class Option(object):
|
|||||||
self.switches = switches
|
self.switches = switches
|
||||||
self.help = help.replace('%default', repr(default)) if help else None
|
self.help = help.replace('%default', repr(default)) if help else None
|
||||||
self.type = type
|
self.type = type
|
||||||
|
if self.type is None and action is None and choices is None:
|
||||||
|
if isinstance(default, float):
|
||||||
|
self.type = 'float'
|
||||||
|
elif isinstance(default, int) and not isinstance(default, bool):
|
||||||
|
self.type = 'int'
|
||||||
|
|
||||||
self.choices = choices
|
self.choices = choices
|
||||||
self.check = check
|
self.check = check
|
||||||
self.group = group
|
self.group = group
|
||||||
@ -229,7 +235,7 @@ class OptionSet(object):
|
|||||||
option will not be added to the command line parser.
|
option will not be added to the command line parser.
|
||||||
:param help: Help text.
|
:param help: Help text.
|
||||||
:param type: Type checking of option values. Supported types are:
|
:param type: Type checking of option values. Supported types are:
|
||||||
`None, 'choice', 'complex', 'float', 'int', 'long', 'string'`.
|
`None, 'choice', 'complex', 'float', 'int', 'string'`.
|
||||||
:param choices: List of strings or `None`.
|
:param choices: List of strings or `None`.
|
||||||
:param group: Group this option belongs to. You must previously
|
:param group: Group this option belongs to. You must previously
|
||||||
have created this group with a call to :method:`add_group`.
|
have created this group with a call to :method:`add_group`.
|
||||||
@ -289,7 +295,11 @@ class OptionSet(object):
|
|||||||
exec src in options
|
exec src in options
|
||||||
opts = OptionValues()
|
opts = OptionValues()
|
||||||
for pref in self.preferences:
|
for pref in self.preferences:
|
||||||
setattr(opts, pref.name, options.get(pref.name, pref.default))
|
val = options.get(pref.name, pref.default)
|
||||||
|
formatter = __builtins__.get(pref.type, None)
|
||||||
|
if callable(formatter):
|
||||||
|
val = formatter(val)
|
||||||
|
setattr(opts, pref.name, val)
|
||||||
|
|
||||||
return opts
|
return opts
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user