HTML Output: Convert the ebook to a colelction of HTML files suitable for displaying as a website

2025-07-09 03:04:10 -04:00 · 2010-11-02 13:10:32 -06:00 · 2010-11-02 13:10:32 -06:00 · 8367f875a5
commit 8367f875a5
parent 4ee2991358 677d1f3441
8 changed files with 518 additions and 3 deletions
--- a/resources/templates/html_export_default.css
+++ b/resources/templates/html_export_default.css
@ -0,0 +1,60 @@
+body{
+  margin:0px;
+  padding: 0.5em;
+  background-color:#F6F3E9;
+  font-size:12px;
+  font-family:Arial, Helvetica, sans-serif;
+}
+
+.calibreMeta{
+  background-color:#39322B;
+  color:white;
+  padding:10px;
+}
+
+.calibreMeta a, .calibreEbNav a, .calibreEbNavTop a, .calibreToc a{
+  color:white;
+}
+
+.calibreMeta h1{
+  margin:0px;
+  font-size:18px;
+  background-color:#39322B;
+}
+
+.calibreEbookContent{
+  padding:20px;
+}
+
+.calibreEbNav, .calibreEbNavTop{
+  clear:both;
+  background-color:#39322B;
+  color:white;
+  padding:10px;
+  text-align:center;
+}
+
+.calibreEbNavTop{
+  margin-bottom:20px;
+}
+
+.calibreEbNav a, .calibreEbNavTop a{
+  padding:0px 5px;
+}
+
+.calibreTocIndex{
+  line-height:18px;
+}
+
+.calibreToc{
+  float:left;
+  margin:20px;
+  width:300px;
+  background-color:#39322B;
+  color:white;
+  padding:10px;
+}
+.calibreEbookContent{
+  width:600px;
+  float:left;
+}
--- a/resources/templates/html_export_default.tmpl
+++ b/resources/templates/html_export_default.tmpl
@ -0,0 +1,74 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head>
+${head_content}$
+
+<link href="${cssLink}$" type="text/css" rel="stylesheet" />
+
+</head>
+<body>
+
+<div class="calibreMeta">
+  <div class="calibreMetaTitle">
+  ${pos1=1}$
+  ${for title in meta.titles():}$
+    ${if pos1:}$
+    <h1>
+      <a href="${tocUrl}$">${print title}$</a>
+    </h1>
+    ${:else:}$
+    <div class="calibreMetaSubtitle">${print title}$</div>
+    ${:endif}$
+    ${pos1=0}$
+  ${:endfor}$
+  </div>
+  <div class="calibreMetaAuthor">
+    ${print ', '.join(meta.creators())}$
+  </div>
+</div>
+
+<div class="calibreMain">
+
+  <div class="calibreEbookContent">
+    ${if prevLink or nextLink:}$
+      <div class="calibreEbNavTop">
+        ${if prevLink:}$
+          <a href="${prevLink}$" class="calibreAPrev">${print _('previous page'),}$</a>
+        ${:else:}$
+          <a href="${tocUrl}$" class="calibreAPrev">${print _('previous page'),}$</a>
+        ${:endif}$
+
+        ${if nextLink:}$
+          <a href="${nextLink}$" class="calibreANext">${print _('next page'),}$</a>
+        ${:endif}$
+      </div>
+    ${:endif}$
+
+    ${ebookContent}$
+  </div>
+
+  ${if has_toc:}$
+  <div class="calibreToc">
+    <h2><a href="${tocUrl}$">${print _('Table of contents'),}$</a></h2>
+    ${print toc()}$
+  </div>
+  ${:endif}$
+
+  <div class="calibreEbNav">
+    ${if prevLink:}$
+      <a href="${prevLink}$" class="calibreAPrev">${print _('previous page'),}$</a>
+    ${:else:}$
+      <a href="${tocUrl}$" class="calibreAPrev">${print _('previous page'),}$</a>
+    ${:endif}$
+
+    <a href="${tocUrl}$" class="calibreAHome">${print _('start'),}$</a>
+
+    ${if nextLink:}$
+      <a href="${nextLink}$" class="calibreANext">${print _('next page'),}$</a>
+    ${:endif}$
+  </div>
+
+</div>
+
+</body>
+</html>
--- a/resources/templates/html_export_default_index.tmpl
+++ b/resources/templates/html_export_default_index.tmpl
@ -0,0 +1,61 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
+
+<link rel="schema.DC" href="http://purl.org/dc/elements/1.1/" />
+<link rel="schema.DCTERMS" href="http://purl.org/dc/terms/" />
+
+<title>${print ', '.join(meta.creators()),}$ - ${print meta.titles().next(); meta.titles().close()}$</title>
+
+${for item in meta:}$
+  <meta ${print 'name="DC.'+item['name']+'"',}$ ${print 'content="'+item['value']+'"',}$ />
+${:endfor}$
+
+<link href="${cssLink}$" type="text/css" rel="stylesheet" />
+</head>
+<body>
+
+<div class="calibreMeta">
+  <div class="calibreMetaTitle">
+  ${pos1=1}$
+  ${for title in meta.titles():}$
+    ${if pos1:}$
+    <h1>
+      <a href="${tocUrl}$">${print title}$</a>
+    </h1>
+    ${:else:}$
+    <div class="calibreMetaSubtitle">${print title}$</div>
+    ${:endif}$
+    ${pos1=0}$
+  ${:endfor}$
+  </div>
+  <div class="calibreMetaAuthor">
+    ${print ', '.join(meta.creators()),}$
+  </div>
+</div>
+
+<div class="calibreMain">
+  <div class="calibreEbookContent">
+
+    ${if has_toc:}$
+      <div class="calibreTocIndex">
+        <h2>${print _('Table of contents'),}$</h2>
+        ${toc}$
+      </div>
+    ${:else:}$
+        <h2>${print _('No table of contents present'),}$</h2>
+        <div><strong><a href="${nextLink}$">${print _('begin to read'),}$</a></strong></div>
+    ${:endif}$
+
+  </div>
+
+  <div class="calibreEbNav">
+    ${if nextLink:}$
+      <a href="${nextLink}$" class="calibreANext">${print _('next page'),}$</a>
+    ${:endif}$
+  </div>
+</div>
+
+</body>
+</html>
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@ -446,6 +446,7 @@ from calibre.ebooks.rb.output import RBOutput
 from calibre.ebooks.rtf.output import RTFOutput
 from calibre.ebooks.tcr.output import TCROutput
 from calibre.ebooks.txt.output import TXTOutput
+from calibre.ebooks.html.output import HTMLOutput
 from calibre.ebooks.snb.output import SNBOutput

 from calibre.customize.profiles import input_profiles, output_profiles
@ -525,6 +526,7 @@ plugins += [
    RTFOutput,
    TCROutput,
    TXTOutput,
+    HTMLOutput,
    SNBOutput,
 ]
 # Order here matters. The first matched device is the one used.
@ -893,4 +895,3 @@ plugins += [LookAndFeel, Behavior, Columns, Toolbar, InputOptions,
        Email, Server, Plugins, Tweaks, Misc]

 #}}}
-
--- a/src/calibre/ebooks/html/meta.py
+++ b/src/calibre/ebooks/html/meta.py
@ -0,0 +1,33 @@
+from __future__ import with_statement
+__license__ = 'GPL 3'
+__copyright__ = '2010, Fabian Grassl <fg@jusmeum.de>'
+__docformat__ = 'restructuredtext en'
+
+
+from calibre.ebooks.oeb.base import namespace, barename, DC11_NS
+
+class EasyMeta(object):
+
+    def __init__(self, meta):
+        self.meta = meta
+
+    def __iter__(self):
+        meta = self.meta
+        for item_name in meta.items:
+            for item in meta[item_name]:
+                if namespace(item.term) == DC11_NS:
+                    yield { 'name': barename(item.term), 'value': item.value }
+
+    def __len__(self):
+        count = 0
+        for item in self:
+            count = count+1
+        return count
+
+    def titles(self):
+        for item in self.meta['title']:
+            yield item.value
+
+    def creators(self):
+        for item in self.meta['creator']:
+            yield item.value
--- a/src/calibre/ebooks/html/output.py
+++ b/src/calibre/ebooks/html/output.py
@ -0,0 +1,201 @@
+from __future__ import with_statement
+__license__ = 'GPL 3'
+__copyright__ = '2010, Fabian Grassl <fg@jusmeum.de>'
+__docformat__ = 'restructuredtext en'
+
+import os, re, shutil
+
+from os.path import dirname, abspath, relpath, exists
+
+from lxml import etree
+from templite import Templite
+
+from calibre.ebooks.oeb.base import element
+from calibre.customize.conversion import OutputFormatPlugin, OptionRecommendation
+from calibre import CurrentDir
+from calibre.ptempfile import PersistentTemporaryDirectory
+from calibre.utils.zipfile import ZipFile
+
+from urllib import unquote
+
+from calibre.ebooks.html.meta import EasyMeta
+
+class HTMLOutput(OutputFormatPlugin):
+
+    name = 'HTML Output'
+    author = 'Fabian Grassl'
+    file_type = 'zip'
+
+    options = set([
+        OptionRecommendation(name='template_css',
+            help=_('CSS file used for the output instead of the default file')),
+
+        OptionRecommendation(name='template_html_index',
+            help=_('Template used for generation of the html index file instead of the default file')),
+
+        OptionRecommendation(name='template_html',
+            help=_('Template used for the generation of the html contents of the book instead of the default file')),
+
+        OptionRecommendation(name='extract_to',
+            help=_('Extract the contents of the generated ZIP file to the directory of the generated ZIP file')
+        ),
+    ])
+
+    recommendations = set([('pretty_print', True, OptionRecommendation.HIGH)])
+
+    def generate_toc(self, oeb_book, ref_url, output_dir):
+        '''
+        Generate table of contents
+        '''
+        with CurrentDir(output_dir):
+            def build_node(current_node, parent=None):
+                if parent is None:
+                    parent = etree.Element('ul')
+                elif len(current_node.nodes):
+                    parent = element(parent, ('ul'))
+                for node in current_node.nodes:
+                    point = element(parent, 'li')
+                    href = relpath(abspath(unquote(node.href)), dirname(ref_url))
+                    link = element(point, 'a', href=href)
+                    title = node.title
+                    if title:
+                        title = re.sub(r'\s+', ' ', title)
+                    link.text=title
+                    build_node(node, point)
+                return parent
+            wrap = etree.Element('div')
+            wrap.append(build_node(oeb_book.toc))
+            return wrap
+
+    def generate_html_toc(self, oeb_book, ref_url, output_dir):
+        root = self.generate_toc(oeb_book, ref_url, output_dir)
+        return etree.tostring(root, pretty_print=True, encoding='utf-8',
+                xml_declaration=True)
+
+    def convert(self, oeb_book, output_path, input_plugin, opts, log):
+
+        # read template files
+        if opts.template_html_index is not None:
+            template_html_index_data = open(opts.template_html_index, 'rb').read()
+        else:
+            template_html_index_data = P('templates/html_export_default_index.tmpl', data=True)
+
+        if opts.template_html is not None:
+            template_html_data = open(opts.template_html, 'rb').read()
+        else:
+            template_html_data = P('templates/html_export_default.tmpl', data=True)
+
+        if opts.template_css is not None:
+            template_css_data = open(opts.template_css, 'rb').read()
+        else:
+            template_css_data = P('templates/html_export_default.css', data=True)
+
+        template_html_index_data = template_html_index_data.decode('utf-8')
+        template_html_data = template_html_data.decode('utf-8')
+        template_css_data = template_css_data.decode('utf-8')
+
+        self.log  = log
+        self.opts = opts
+        meta = EasyMeta(oeb_book.metadata)
+
+        tempdir = PersistentTemporaryDirectory()
+        output_file = os.path.join(tempdir,
+                os.path.basename(re.sub(r'\.zip', '', output_path)+'.html'))
+        output_dir = re.sub(r'\.html', '', output_file)+'_files'
+
+        if not exists(output_dir):
+            os.makedirs(output_dir)
+
+        css_path = output_dir+os.sep+'calibreHtmlOutBasicCss.css'
+        with open(css_path, 'wb') as f:
+            f.write(template_css_data.encode('utf-8'))
+
+        with open(output_file, 'wb') as f:
+            html_toc = self.generate_html_toc(oeb_book, output_file, output_dir)
+            templite = Templite(template_html_index_data)
+            nextLink = oeb_book.spine[0].href
+            nextLink = relpath(output_dir+os.sep+nextLink, dirname(output_file))
+            cssLink = relpath(abspath(css_path), dirname(output_file))
+            tocUrl = relpath(output_file, dirname(output_file))
+            t = templite.render(has_toc=bool(oeb_book.toc.count()),
+                    toc=html_toc, meta=meta, nextLink=nextLink,
+                    tocUrl=tocUrl, cssLink=cssLink)
+            f.write(t)
+
+        with CurrentDir(output_dir):
+            for item in oeb_book.manifest:
+                path = abspath(unquote(item.href))
+                dir = dirname(path)
+                if not exists(dir):
+                    os.makedirs(dir)
+                if item.spine_position is not None:
+                    with open(path, 'wb') as f:
+                        pass
+                else:
+                    with open(path, 'wb') as f:
+                        f.write(str(item))
+                    item.unload_data_from_memory(memory=path)
+
+            for item in oeb_book.spine:
+                path = abspath(unquote(item.href))
+                dir = dirname(path)
+                root = item.data.getroottree()
+
+                # get & clean HTML <HEAD>-data
+                head = root.xpath('//h:head', namespaces={'h': 'http://www.w3.org/1999/xhtml'})[0]
+                head_content = etree.tostring(head, pretty_print=True, encoding='utf-8')
+                head_content = re.sub(r'\<\/?head.*\>', '', head_content)
+                head_content = re.sub(re.compile(r'\<style.*\/style\>', re.M|re.S), '', head_content)
+
+                # get & clean HTML <BODY>-data
+                body = root.xpath('//h:body', namespaces={'h': 'http://www.w3.org/1999/xhtml'})[0]
+                ebook_content = etree.tostring(body, pretty_print=True, encoding='utf-8')
+                ebook_content = re.sub(r'\<\/?body.*\>', '', ebook_content)
+
+                # generate link to next page
+                if item.spine_position+1 < len(oeb_book.spine):
+                    nextLink = oeb_book.spine[item.spine_position+1].href
+                    nextLink = relpath(abspath(nextLink), dir)
+                else:
+                    nextLink = None
+
+                # generate link to previous page
+                if item.spine_position > 0:
+                    prevLink = oeb_book.spine[item.spine_position-1].href
+                    prevLink = relpath(abspath(prevLink), dir)
+                else:
+                    prevLink = None
+
+                cssLink = relpath(abspath(css_path), dir)
+                tocUrl = relpath(output_file, dir)
+
+                # render template
+                templite = Templite(template_html_data)
+                toc = lambda: self.generate_html_toc(oeb_book, path, output_dir)
+                t = templite.render(ebookContent=ebook_content,
+                        prevLink=prevLink, nextLink=nextLink,
+                        has_toc=bool(oeb_book.toc.count()), toc=toc,
+                        tocUrl=tocUrl, head_content=head_content,
+                        meta=meta, cssLink=cssLink)
+
+                # write html to file
+                with open(path, 'wb') as f:
+                    f.write(t)
+                item.unload_data_from_memory(memory=path)
+
+        zfile = ZipFile(output_path, "w")
+        zfile.add_dir(output_dir)
+
+        if opts.extract_to:
+            if os.path.exists(opts.extract_to):
+                shutil.rmtree(opts.extract_to)
+            os.makedirs(opts.extract_to)
+            zfile.extractall(opts.extract_to)
+            self.log('Zip file extracted to', opts.extract_to)
+
+        zfile.close()
+
+        # cleanup temp dir
+        shutil.rmtree(tempdir)
+
+
--- a/src/calibre/ebooks/oeb/output.py
+++ b/src/calibre/ebooks/oeb/output.py
@ -49,5 +49,3 @@ class OEBOutput(OutputFormatPlugin):
                with open(path, 'wb') as f:
                    f.write(str(item))
                item.unload_data_from_memory(memory=path)
-
-
--- a/src/templite/init.py
+++ b/src/templite/init.py
@ -0,0 +1,87 @@
+#!/usr/bin/env python
+#
+#       Templite+
+#       A light-weight, fully functional, general purpose templating engine
+#
+#       Copyright (c) 2009 joonis new media
+#       Author: Thimo Kraemer <thimo.kraemer@joonis.de>
+#
+#       Based on Templite - Tomer Filiba
+#       http://code.activestate.com/recipes/496702/
+#
+#       This program is free software; you can redistribute it and/or modify
+#       it under the terms of the GNU General Public License as published by
+#       the Free Software Foundation; either version 2 of the License, or
+#       (at your option) any later version.
+#
+#       This program is distributed in the hope that it will be useful,
+#       but WITHOUT ANY WARRANTY; without even the implied warranty of
+#       MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#       GNU General Public License for more details.
+#
+#       You should have received a copy of the GNU General Public License
+#       along with this program; if not, write to the Free Software
+#       Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+#       MA 02110-1301, USA.
+#
+
+import sys, re
+
+class Templite(object):
+    auto_emit = re.compile('(^[\'\"])|(^[a-zA-Z0-9_\[\]\'\"]+$)')
+
+    def __init__(self, template, start='${', end='}$'):
+        if len(start) != 2 or len(end) != 2:
+            raise ValueError('each delimiter must be two characters long')
+        delimiter = re.compile('%s(.*?)%s' % (re.escape(start), re.escape(end)), re.DOTALL)
+        offset = 0
+        tokens = []
+        for i, part in enumerate(delimiter.split(template)):
+            part = part.replace('\\'.join(list(start)), start)
+            part = part.replace('\\'.join(list(end)), end)
+            if i % 2 == 0:
+                if not part: continue
+                part = part.replace('\\', '\\\\').replace('"', '\\"')
+                part = '\t' * offset + 'emit("""%s""")' % part
+            else:
+                part = part.rstrip()
+                if not part: continue
+                if part.lstrip().startswith(':'):
+                    if not offset:
+                        raise SyntaxError('no block statement to terminate: ${%s}$' % part)
+                    offset -= 1
+                    part = part.lstrip()[1:]
+                    if not part.endswith(':'): continue
+                elif self.auto_emit.match(part.lstrip()):
+                    part = 'emit(%s)' % part.lstrip()
+                lines = part.splitlines()
+                margin = min(len(l) - len(l.lstrip()) for l in lines if l.strip())
+                part = '\n'.join('\t' * offset + l[margin:] for l in lines)
+                if part.endswith(':'):
+                    offset += 1
+            tokens.append(part)
+        if offset:
+            raise SyntaxError('%i block statement(s) not terminated' % offset)
+        self.__code = compile('\n'.join(tokens), '<templite %r>' % template[:20], 'exec')
+
+    def render(self, __namespace=None, **kw):
+        """
+        renders the template according to the given namespace.
+        __namespace - a dictionary serving as a namespace for evaluation
+        **kw - keyword arguments which are added to the namespace
+        """
+        namespace = {}
+        if __namespace: namespace.update(__namespace)
+        if kw: namespace.update(kw)
+        namespace['emit'] = self.write
+
+        __stdout = sys.stdout
+        sys.stdout = self
+        self.__output = []
+        eval(self.__code, namespace)
+        sys.stdout = __stdout
+        return ''.join(self.__output)
+
+    def write(self, *args):
+        for a in args:
+            self.__output.append(str(a))