HTML Output: Convert the ebook to a colelction of HTML files suitable for displaying as a website

This commit is contained in:
Kovid Goyal 2010-11-02 13:10:32 -06:00
commit 8367f875a5
8 changed files with 518 additions and 3 deletions

View File

@ -0,0 +1,60 @@
body{
margin:0px;
padding: 0.5em;
background-color:#F6F3E9;
font-size:12px;
font-family:Arial, Helvetica, sans-serif;
}
.calibreMeta{
background-color:#39322B;
color:white;
padding:10px;
}
.calibreMeta a, .calibreEbNav a, .calibreEbNavTop a, .calibreToc a{
color:white;
}
.calibreMeta h1{
margin:0px;
font-size:18px;
background-color:#39322B;
}
.calibreEbookContent{
padding:20px;
}
.calibreEbNav, .calibreEbNavTop{
clear:both;
background-color:#39322B;
color:white;
padding:10px;
text-align:center;
}
.calibreEbNavTop{
margin-bottom:20px;
}
.calibreEbNav a, .calibreEbNavTop a{
padding:0px 5px;
}
.calibreTocIndex{
line-height:18px;
}
.calibreToc{
float:left;
margin:20px;
width:300px;
background-color:#39322B;
color:white;
padding:10px;
}
.calibreEbookContent{
width:600px;
float:left;
}

View File

@ -0,0 +1,74 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
${head_content}$
<link href="${cssLink}$" type="text/css" rel="stylesheet" />
</head>
<body>
<div class="calibreMeta">
<div class="calibreMetaTitle">
${pos1=1}$
${for title in meta.titles():}$
${if pos1:}$
<h1>
<a href="${tocUrl}$">${print title}$</a>
</h1>
${:else:}$
<div class="calibreMetaSubtitle">${print title}$</div>
${:endif}$
${pos1=0}$
${:endfor}$
</div>
<div class="calibreMetaAuthor">
${print ', '.join(meta.creators())}$
</div>
</div>
<div class="calibreMain">
<div class="calibreEbookContent">
${if prevLink or nextLink:}$
<div class="calibreEbNavTop">
${if prevLink:}$
<a href="${prevLink}$" class="calibreAPrev">${print _('previous page'),}$</a>
${:else:}$
<a href="${tocUrl}$" class="calibreAPrev">${print _('previous page'),}$</a>
${:endif}$
${if nextLink:}$
<a href="${nextLink}$" class="calibreANext">${print _('next page'),}$</a>
${:endif}$
</div>
${:endif}$
${ebookContent}$
</div>
${if has_toc:}$
<div class="calibreToc">
<h2><a href="${tocUrl}$">${print _('Table of contents'),}$</a></h2>
${print toc()}$
</div>
${:endif}$
<div class="calibreEbNav">
${if prevLink:}$
<a href="${prevLink}$" class="calibreAPrev">${print _('previous page'),}$</a>
${:else:}$
<a href="${tocUrl}$" class="calibreAPrev">${print _('previous page'),}$</a>
${:endif}$
<a href="${tocUrl}$" class="calibreAHome">${print _('start'),}$</a>
${if nextLink:}$
<a href="${nextLink}$" class="calibreANext">${print _('next page'),}$</a>
${:endif}$
</div>
</div>
</body>
</html>

View File

@ -0,0 +1,61 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
<link rel="schema.DC" href="http://purl.org/dc/elements/1.1/" />
<link rel="schema.DCTERMS" href="http://purl.org/dc/terms/" />
<title>${print ', '.join(meta.creators()),}$ - ${print meta.titles().next(); meta.titles().close()}$</title>
${for item in meta:}$
<meta ${print 'name="DC.'+item['name']+'"',}$ ${print 'content="'+item['value']+'"',}$ />
${:endfor}$
<link href="${cssLink}$" type="text/css" rel="stylesheet" />
</head>
<body>
<div class="calibreMeta">
<div class="calibreMetaTitle">
${pos1=1}$
${for title in meta.titles():}$
${if pos1:}$
<h1>
<a href="${tocUrl}$">${print title}$</a>
</h1>
${:else:}$
<div class="calibreMetaSubtitle">${print title}$</div>
${:endif}$
${pos1=0}$
${:endfor}$
</div>
<div class="calibreMetaAuthor">
${print ', '.join(meta.creators()),}$
</div>
</div>
<div class="calibreMain">
<div class="calibreEbookContent">
${if has_toc:}$
<div class="calibreTocIndex">
<h2>${print _('Table of contents'),}$</h2>
${toc}$
</div>
${:else:}$
<h2>${print _('No table of contents present'),}$</h2>
<div><strong><a href="${nextLink}$">${print _('begin to read'),}$</a></strong></div>
${:endif}$
</div>
<div class="calibreEbNav">
${if nextLink:}$
<a href="${nextLink}$" class="calibreANext">${print _('next page'),}$</a>
${:endif}$
</div>
</div>
</body>
</html>

View File

@ -446,6 +446,7 @@ from calibre.ebooks.rb.output import RBOutput
from calibre.ebooks.rtf.output import RTFOutput
from calibre.ebooks.tcr.output import TCROutput
from calibre.ebooks.txt.output import TXTOutput
from calibre.ebooks.html.output import HTMLOutput
from calibre.ebooks.snb.output import SNBOutput
from calibre.customize.profiles import input_profiles, output_profiles
@ -525,6 +526,7 @@ plugins += [
RTFOutput,
TCROutput,
TXTOutput,
HTMLOutput,
SNBOutput,
]
# Order here matters. The first matched device is the one used.
@ -893,4 +895,3 @@ plugins += [LookAndFeel, Behavior, Columns, Toolbar, InputOptions,
Email, Server, Plugins, Tweaks, Misc]
#}}}

View File

@ -0,0 +1,33 @@
from __future__ import with_statement
__license__ = 'GPL 3'
__copyright__ = '2010, Fabian Grassl <fg@jusmeum.de>'
__docformat__ = 'restructuredtext en'
from calibre.ebooks.oeb.base import namespace, barename, DC11_NS
class EasyMeta(object):
def __init__(self, meta):
self.meta = meta
def __iter__(self):
meta = self.meta
for item_name in meta.items:
for item in meta[item_name]:
if namespace(item.term) == DC11_NS:
yield { 'name': barename(item.term), 'value': item.value }
def __len__(self):
count = 0
for item in self:
count = count+1
return count
def titles(self):
for item in self.meta['title']:
yield item.value
def creators(self):
for item in self.meta['creator']:
yield item.value

View File

@ -0,0 +1,201 @@
from __future__ import with_statement
__license__ = 'GPL 3'
__copyright__ = '2010, Fabian Grassl <fg@jusmeum.de>'
__docformat__ = 'restructuredtext en'
import os, re, shutil
from os.path import dirname, abspath, relpath, exists
from lxml import etree
from templite import Templite
from calibre.ebooks.oeb.base import element
from calibre.customize.conversion import OutputFormatPlugin, OptionRecommendation
from calibre import CurrentDir
from calibre.ptempfile import PersistentTemporaryDirectory
from calibre.utils.zipfile import ZipFile
from urllib import unquote
from calibre.ebooks.html.meta import EasyMeta
class HTMLOutput(OutputFormatPlugin):
name = 'HTML Output'
author = 'Fabian Grassl'
file_type = 'zip'
options = set([
OptionRecommendation(name='template_css',
help=_('CSS file used for the output instead of the default file')),
OptionRecommendation(name='template_html_index',
help=_('Template used for generation of the html index file instead of the default file')),
OptionRecommendation(name='template_html',
help=_('Template used for the generation of the html contents of the book instead of the default file')),
OptionRecommendation(name='extract_to',
help=_('Extract the contents of the generated ZIP file to the directory of the generated ZIP file')
),
])
recommendations = set([('pretty_print', True, OptionRecommendation.HIGH)])
def generate_toc(self, oeb_book, ref_url, output_dir):
'''
Generate table of contents
'''
with CurrentDir(output_dir):
def build_node(current_node, parent=None):
if parent is None:
parent = etree.Element('ul')
elif len(current_node.nodes):
parent = element(parent, ('ul'))
for node in current_node.nodes:
point = element(parent, 'li')
href = relpath(abspath(unquote(node.href)), dirname(ref_url))
link = element(point, 'a', href=href)
title = node.title
if title:
title = re.sub(r'\s+', ' ', title)
link.text=title
build_node(node, point)
return parent
wrap = etree.Element('div')
wrap.append(build_node(oeb_book.toc))
return wrap
def generate_html_toc(self, oeb_book, ref_url, output_dir):
root = self.generate_toc(oeb_book, ref_url, output_dir)
return etree.tostring(root, pretty_print=True, encoding='utf-8',
xml_declaration=True)
def convert(self, oeb_book, output_path, input_plugin, opts, log):
# read template files
if opts.template_html_index is not None:
template_html_index_data = open(opts.template_html_index, 'rb').read()
else:
template_html_index_data = P('templates/html_export_default_index.tmpl', data=True)
if opts.template_html is not None:
template_html_data = open(opts.template_html, 'rb').read()
else:
template_html_data = P('templates/html_export_default.tmpl', data=True)
if opts.template_css is not None:
template_css_data = open(opts.template_css, 'rb').read()
else:
template_css_data = P('templates/html_export_default.css', data=True)
template_html_index_data = template_html_index_data.decode('utf-8')
template_html_data = template_html_data.decode('utf-8')
template_css_data = template_css_data.decode('utf-8')
self.log = log
self.opts = opts
meta = EasyMeta(oeb_book.metadata)
tempdir = PersistentTemporaryDirectory()
output_file = os.path.join(tempdir,
os.path.basename(re.sub(r'\.zip', '', output_path)+'.html'))
output_dir = re.sub(r'\.html', '', output_file)+'_files'
if not exists(output_dir):
os.makedirs(output_dir)
css_path = output_dir+os.sep+'calibreHtmlOutBasicCss.css'
with open(css_path, 'wb') as f:
f.write(template_css_data.encode('utf-8'))
with open(output_file, 'wb') as f:
html_toc = self.generate_html_toc(oeb_book, output_file, output_dir)
templite = Templite(template_html_index_data)
nextLink = oeb_book.spine[0].href
nextLink = relpath(output_dir+os.sep+nextLink, dirname(output_file))
cssLink = relpath(abspath(css_path), dirname(output_file))
tocUrl = relpath(output_file, dirname(output_file))
t = templite.render(has_toc=bool(oeb_book.toc.count()),
toc=html_toc, meta=meta, nextLink=nextLink,
tocUrl=tocUrl, cssLink=cssLink)
f.write(t)
with CurrentDir(output_dir):
for item in oeb_book.manifest:
path = abspath(unquote(item.href))
dir = dirname(path)
if not exists(dir):
os.makedirs(dir)
if item.spine_position is not None:
with open(path, 'wb') as f:
pass
else:
with open(path, 'wb') as f:
f.write(str(item))
item.unload_data_from_memory(memory=path)
for item in oeb_book.spine:
path = abspath(unquote(item.href))
dir = dirname(path)
root = item.data.getroottree()
# get & clean HTML <HEAD>-data
head = root.xpath('//h:head', namespaces={'h': 'http://www.w3.org/1999/xhtml'})[0]
head_content = etree.tostring(head, pretty_print=True, encoding='utf-8')
head_content = re.sub(r'\<\/?head.*\>', '', head_content)
head_content = re.sub(re.compile(r'\<style.*\/style\>', re.M|re.S), '', head_content)
# get & clean HTML <BODY>-data
body = root.xpath('//h:body', namespaces={'h': 'http://www.w3.org/1999/xhtml'})[0]
ebook_content = etree.tostring(body, pretty_print=True, encoding='utf-8')
ebook_content = re.sub(r'\<\/?body.*\>', '', ebook_content)
# generate link to next page
if item.spine_position+1 < len(oeb_book.spine):
nextLink = oeb_book.spine[item.spine_position+1].href
nextLink = relpath(abspath(nextLink), dir)
else:
nextLink = None
# generate link to previous page
if item.spine_position > 0:
prevLink = oeb_book.spine[item.spine_position-1].href
prevLink = relpath(abspath(prevLink), dir)
else:
prevLink = None
cssLink = relpath(abspath(css_path), dir)
tocUrl = relpath(output_file, dir)
# render template
templite = Templite(template_html_data)
toc = lambda: self.generate_html_toc(oeb_book, path, output_dir)
t = templite.render(ebookContent=ebook_content,
prevLink=prevLink, nextLink=nextLink,
has_toc=bool(oeb_book.toc.count()), toc=toc,
tocUrl=tocUrl, head_content=head_content,
meta=meta, cssLink=cssLink)
# write html to file
with open(path, 'wb') as f:
f.write(t)
item.unload_data_from_memory(memory=path)
zfile = ZipFile(output_path, "w")
zfile.add_dir(output_dir)
if opts.extract_to:
if os.path.exists(opts.extract_to):
shutil.rmtree(opts.extract_to)
os.makedirs(opts.extract_to)
zfile.extractall(opts.extract_to)
self.log('Zip file extracted to', opts.extract_to)
zfile.close()
# cleanup temp dir
shutil.rmtree(tempdir)

View File

@ -49,5 +49,3 @@ class OEBOutput(OutputFormatPlugin):
with open(path, 'wb') as f:
f.write(str(item))
item.unload_data_from_memory(memory=path)

87
src/templite/__init__.py Normal file
View File

@ -0,0 +1,87 @@
#!/usr/bin/env python
#
# Templite+
# A light-weight, fully functional, general purpose templating engine
#
# Copyright (c) 2009 joonis new media
# Author: Thimo Kraemer <thimo.kraemer@joonis.de>
#
# Based on Templite - Tomer Filiba
# http://code.activestate.com/recipes/496702/
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
# MA 02110-1301, USA.
#
import sys, re
class Templite(object):
auto_emit = re.compile('(^[\'\"])|(^[a-zA-Z0-9_\[\]\'\"]+$)')
def __init__(self, template, start='${', end='}$'):
if len(start) != 2 or len(end) != 2:
raise ValueError('each delimiter must be two characters long')
delimiter = re.compile('%s(.*?)%s' % (re.escape(start), re.escape(end)), re.DOTALL)
offset = 0
tokens = []
for i, part in enumerate(delimiter.split(template)):
part = part.replace('\\'.join(list(start)), start)
part = part.replace('\\'.join(list(end)), end)
if i % 2 == 0:
if not part: continue
part = part.replace('\\', '\\\\').replace('"', '\\"')
part = '\t' * offset + 'emit("""%s""")' % part
else:
part = part.rstrip()
if not part: continue
if part.lstrip().startswith(':'):
if not offset:
raise SyntaxError('no block statement to terminate: ${%s}$' % part)
offset -= 1
part = part.lstrip()[1:]
if not part.endswith(':'): continue
elif self.auto_emit.match(part.lstrip()):
part = 'emit(%s)' % part.lstrip()
lines = part.splitlines()
margin = min(len(l) - len(l.lstrip()) for l in lines if l.strip())
part = '\n'.join('\t' * offset + l[margin:] for l in lines)
if part.endswith(':'):
offset += 1
tokens.append(part)
if offset:
raise SyntaxError('%i block statement(s) not terminated' % offset)
self.__code = compile('\n'.join(tokens), '<templite %r>' % template[:20], 'exec')
def render(self, __namespace=None, **kw):
"""
renders the template according to the given namespace.
__namespace - a dictionary serving as a namespace for evaluation
**kw - keyword arguments which are added to the namespace
"""
namespace = {}
if __namespace: namespace.update(__namespace)
if kw: namespace.update(kw)
namespace['emit'] = self.write
__stdout = sys.stdout
sys.stdout = self
self.__output = []
eval(self.__code, namespace)
sys.stdout = __stdout
return ''.join(self.__output)
def write(self, *args):
for a in args:
self.__output.append(str(a))