mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-08 18:54:09 -04:00
Merge from main branch
This commit is contained in:
commit
9c1a8a3b77
@ -231,6 +231,17 @@ class HTMLMetadataReader(MetadataReaderPlugin):
|
|||||||
from calibre.ebooks.metadata.html import get_metadata
|
from calibre.ebooks.metadata.html import get_metadata
|
||||||
return get_metadata(stream)
|
return get_metadata(stream)
|
||||||
|
|
||||||
|
class HTMLZMetadataReader(MetadataReaderPlugin):
|
||||||
|
|
||||||
|
name = 'Read HTMLZ metadata'
|
||||||
|
file_types = set(['htmlz'])
|
||||||
|
description = _('Read metadata from %s files') % 'HTMLZ'
|
||||||
|
author = 'John Schember'
|
||||||
|
|
||||||
|
def get_metadata(self, stream, ftype):
|
||||||
|
from calibre.ebooks.metadata.extz import get_metadata
|
||||||
|
return get_metadata(stream)
|
||||||
|
|
||||||
class IMPMetadataReader(MetadataReaderPlugin):
|
class IMPMetadataReader(MetadataReaderPlugin):
|
||||||
|
|
||||||
name = 'Read IMP metadata'
|
name = 'Read IMP metadata'
|
||||||
@ -407,7 +418,7 @@ class TXTZMetadataReader(MetadataReaderPlugin):
|
|||||||
author = 'John Schember'
|
author = 'John Schember'
|
||||||
|
|
||||||
def get_metadata(self, stream, ftype):
|
def get_metadata(self, stream, ftype):
|
||||||
from calibre.ebooks.metadata.txtz import get_metadata
|
from calibre.ebooks.metadata.extz import get_metadata
|
||||||
return get_metadata(stream)
|
return get_metadata(stream)
|
||||||
|
|
||||||
class ZipMetadataReader(MetadataReaderPlugin):
|
class ZipMetadataReader(MetadataReaderPlugin):
|
||||||
@ -433,6 +444,17 @@ class EPUBMetadataWriter(MetadataWriterPlugin):
|
|||||||
from calibre.ebooks.metadata.epub import set_metadata
|
from calibre.ebooks.metadata.epub import set_metadata
|
||||||
set_metadata(stream, mi, apply_null=self.apply_null)
|
set_metadata(stream, mi, apply_null=self.apply_null)
|
||||||
|
|
||||||
|
class HTMLZMetadataWriter(MetadataWriterPlugin):
|
||||||
|
|
||||||
|
name = 'Set HTMLZ metadata'
|
||||||
|
file_types = set(['htmlz'])
|
||||||
|
description = _('Set metadata from %s files') % 'HTMLZ'
|
||||||
|
author = 'John Schember'
|
||||||
|
|
||||||
|
def set_metadata(self, stream, mi, type):
|
||||||
|
from calibre.ebooks.metadata.extz import set_metadata
|
||||||
|
set_metadata(stream, mi)
|
||||||
|
|
||||||
class LRFMetadataWriter(MetadataWriterPlugin):
|
class LRFMetadataWriter(MetadataWriterPlugin):
|
||||||
|
|
||||||
name = 'Set LRF metadata'
|
name = 'Set LRF metadata'
|
||||||
@ -505,7 +527,7 @@ class TXTZMetadataWriter(MetadataWriterPlugin):
|
|||||||
author = 'John Schember'
|
author = 'John Schember'
|
||||||
|
|
||||||
def set_metadata(self, stream, mi, type):
|
def set_metadata(self, stream, mi, type):
|
||||||
from calibre.ebooks.metadata.txtz import set_metadata
|
from calibre.ebooks.metadata.extz import set_metadata
|
||||||
set_metadata(stream, mi)
|
set_metadata(stream, mi)
|
||||||
|
|
||||||
# }}}
|
# }}}
|
||||||
@ -514,6 +536,7 @@ from calibre.ebooks.comic.input import ComicInput
|
|||||||
from calibre.ebooks.epub.input import EPUBInput
|
from calibre.ebooks.epub.input import EPUBInput
|
||||||
from calibre.ebooks.fb2.input import FB2Input
|
from calibre.ebooks.fb2.input import FB2Input
|
||||||
from calibre.ebooks.html.input import HTMLInput
|
from calibre.ebooks.html.input import HTMLInput
|
||||||
|
from calibre.ebooks.htmlz.input import HTMLZInput
|
||||||
from calibre.ebooks.lit.input import LITInput
|
from calibre.ebooks.lit.input import LITInput
|
||||||
from calibre.ebooks.mobi.input import MOBIInput
|
from calibre.ebooks.mobi.input import MOBIInput
|
||||||
from calibre.ebooks.odt.input import ODTInput
|
from calibre.ebooks.odt.input import ODTInput
|
||||||
@ -544,6 +567,7 @@ from calibre.ebooks.tcr.output import TCROutput
|
|||||||
from calibre.ebooks.txt.output import TXTOutput
|
from calibre.ebooks.txt.output import TXTOutput
|
||||||
from calibre.ebooks.txt.output import TXTZOutput
|
from calibre.ebooks.txt.output import TXTZOutput
|
||||||
from calibre.ebooks.html.output import HTMLOutput
|
from calibre.ebooks.html.output import HTMLOutput
|
||||||
|
from calibre.ebooks.htmlz.output import HTMLZOutput
|
||||||
from calibre.ebooks.snb.output import SNBOutput
|
from calibre.ebooks.snb.output import SNBOutput
|
||||||
|
|
||||||
from calibre.customize.profiles import input_profiles, output_profiles
|
from calibre.customize.profiles import input_profiles, output_profiles
|
||||||
@ -599,6 +623,7 @@ plugins += [
|
|||||||
EPUBInput,
|
EPUBInput,
|
||||||
FB2Input,
|
FB2Input,
|
||||||
HTMLInput,
|
HTMLInput,
|
||||||
|
HTMLZInput,
|
||||||
LITInput,
|
LITInput,
|
||||||
MOBIInput,
|
MOBIInput,
|
||||||
ODTInput,
|
ODTInput,
|
||||||
@ -630,6 +655,7 @@ plugins += [
|
|||||||
TXTOutput,
|
TXTOutput,
|
||||||
TXTZOutput,
|
TXTZOutput,
|
||||||
HTMLOutput,
|
HTMLOutput,
|
||||||
|
HTMLZOutput,
|
||||||
SNBOutput,
|
SNBOutput,
|
||||||
]
|
]
|
||||||
# Order here matters. The first matched device is the one used.
|
# Order here matters. The first matched device is the one used.
|
||||||
|
@ -100,6 +100,12 @@ def xml_to_unicode(raw, verbose=False, strip_encoding_pats=False,
|
|||||||
try:
|
try:
|
||||||
if encoding.lower().strip() == 'macintosh':
|
if encoding.lower().strip() == 'macintosh':
|
||||||
encoding = 'mac-roman'
|
encoding = 'mac-roman'
|
||||||
|
if encoding.lower().replace('_', '-').strip() in (
|
||||||
|
'gb2312', 'chinese', 'csiso58gb231280', 'euc-cn', 'euccn',
|
||||||
|
'eucgb2312-cn', 'gb2312-1980', 'gb2312-80', 'iso-ir-58'):
|
||||||
|
# Microsoft Word exports to HTML with encoding incorrectly set to
|
||||||
|
# gb2312 instead of gbk. gbk is a superset of gb2312, anyway.
|
||||||
|
encoding = 'gbk'
|
||||||
raw = raw.decode(encoding, 'replace')
|
raw = raw.decode(encoding, 'replace')
|
||||||
except LookupError:
|
except LookupError:
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
@ -110,11 +116,6 @@ def xml_to_unicode(raw, verbose=False, strip_encoding_pats=False,
|
|||||||
if resolve_entities:
|
if resolve_entities:
|
||||||
raw = substitute_entites(raw)
|
raw = substitute_entites(raw)
|
||||||
|
|
||||||
if encoding and encoding.lower().replace('_', '-').strip() in (
|
|
||||||
'gb2312', 'chinese', 'csiso58gb231280', 'euc-cn', 'euccn',
|
|
||||||
'eucgb2312-cn', 'gb2312-1980', 'gb2312-80', 'iso-ir-58'):
|
|
||||||
# Microsoft Word exports to HTML with encoding incorrectly set to
|
|
||||||
# gb2312 instead of gbk. gbk is a superset of gb2312, anyway.
|
|
||||||
encoding = 'gbk'
|
|
||||||
|
|
||||||
return raw, encoding
|
return raw, encoding
|
||||||
|
0
src/calibre/ebooks/htmlz/__init__.py
Normal file
0
src/calibre/ebooks/htmlz/__init__.py
Normal file
66
src/calibre/ebooks/htmlz/input.py
Normal file
66
src/calibre/ebooks/htmlz/input.py
Normal file
@ -0,0 +1,66 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
from __future__ import (unicode_literals, division, absolute_import, print_function)
|
||||||
|
|
||||||
|
__license__ = 'GPL 3'
|
||||||
|
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
import os
|
||||||
|
|
||||||
|
from calibre import walk
|
||||||
|
from calibre.customize.conversion import InputFormatPlugin
|
||||||
|
from calibre.utils.zipfile import ZipFile
|
||||||
|
|
||||||
|
class HTMLZInput(InputFormatPlugin):
|
||||||
|
|
||||||
|
name = 'HTLZ Input'
|
||||||
|
author = 'John Schember'
|
||||||
|
description = 'Convert HTML files to HTML'
|
||||||
|
file_types = set(['htmlz'])
|
||||||
|
|
||||||
|
def convert(self, stream, options, file_ext, log,
|
||||||
|
accelerators):
|
||||||
|
self.log = log
|
||||||
|
html = u''
|
||||||
|
|
||||||
|
# Extract content from zip archive.
|
||||||
|
zf = ZipFile(stream)
|
||||||
|
zf.extractall('.')
|
||||||
|
|
||||||
|
for x in walk('.'):
|
||||||
|
if os.path.splitext(x)[1].lower() in ('.html', '.xhtml', '.htm'):
|
||||||
|
with open(x, 'rb') as tf:
|
||||||
|
html = tf.read()
|
||||||
|
break
|
||||||
|
|
||||||
|
# Run the HTML through the html processing plugin.
|
||||||
|
from calibre.customize.ui import plugin_for_input_format
|
||||||
|
html_input = plugin_for_input_format('html')
|
||||||
|
for opt in html_input.options:
|
||||||
|
setattr(options, opt.option.name, opt.recommended_value)
|
||||||
|
options.input_encoding = 'utf-8'
|
||||||
|
base = os.getcwdu()
|
||||||
|
fname = os.path.join(base, 'index.html')
|
||||||
|
c = 0
|
||||||
|
while os.path.exists(fname):
|
||||||
|
c += 1
|
||||||
|
fname = 'index%d.html'%c
|
||||||
|
htmlfile = open(fname, 'wb')
|
||||||
|
with htmlfile:
|
||||||
|
htmlfile.write(html.encode('utf-8'))
|
||||||
|
odi = options.debug_pipeline
|
||||||
|
options.debug_pipeline = None
|
||||||
|
# Generate oeb from html conversion.
|
||||||
|
oeb = html_input.convert(open(htmlfile.name, 'rb'), options, 'html', log,
|
||||||
|
{})
|
||||||
|
options.debug_pipeline = odi
|
||||||
|
os.remove(htmlfile.name)
|
||||||
|
|
||||||
|
# Set metadata from file.
|
||||||
|
from calibre.customize.ui import get_file_type_metadata
|
||||||
|
from calibre.ebooks.oeb.transforms.metadata import meta_info_to_oeb_metadata
|
||||||
|
mi = get_file_type_metadata(stream, file_ext)
|
||||||
|
meta_info_to_oeb_metadata(mi, oeb.metadata, log)
|
||||||
|
|
||||||
|
return oeb
|
372
src/calibre/ebooks/htmlz/oeb2html.py
Normal file
372
src/calibre/ebooks/htmlz/oeb2html.py
Normal file
@ -0,0 +1,372 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
from __future__ import (unicode_literals, division, absolute_import, print_function)
|
||||||
|
|
||||||
|
__license__ = 'GPL 3'
|
||||||
|
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
'''
|
||||||
|
Transform OEB content into a single (more or less) HTML file.
|
||||||
|
'''
|
||||||
|
|
||||||
|
import os
|
||||||
|
|
||||||
|
from urlparse import urlparse
|
||||||
|
|
||||||
|
from calibre import prepare_string_for_xml
|
||||||
|
from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace
|
||||||
|
from calibre.ebooks.oeb.stylizer import Stylizer
|
||||||
|
from calibre.utils.logging import default_log
|
||||||
|
|
||||||
|
class OEB2HTML(object):
|
||||||
|
'''
|
||||||
|
Base class. All subclasses should implement dump_text to actually transform
|
||||||
|
content. Also, callers should use oeb2html to get the transformed html.
|
||||||
|
links and images can be retrieved after calling oeb2html to get the mapping
|
||||||
|
of OEB links and images to the new names used in the html returned by oeb2html.
|
||||||
|
Images will always be referenced as if they are in an images directory.
|
||||||
|
|
||||||
|
Use get_css to get the CSS classes for the OEB document as a string.
|
||||||
|
'''
|
||||||
|
|
||||||
|
def __init__(self, log=None):
|
||||||
|
self.log = default_log if log is None else log
|
||||||
|
self.links = {}
|
||||||
|
self.images = {}
|
||||||
|
|
||||||
|
def oeb2html(self, oeb_book, opts):
|
||||||
|
self.log.info('Converting OEB book to HTML...')
|
||||||
|
self.opts = opts
|
||||||
|
self.links = {}
|
||||||
|
self.images = {}
|
||||||
|
|
||||||
|
return self.mlize_spine(oeb_book)
|
||||||
|
|
||||||
|
def mlize_spine(self, oeb_book):
|
||||||
|
output = [u'<html><body><head><meta http-equiv="Content-Type" content="text/html;charset=utf-8" /></head>']
|
||||||
|
for item in oeb_book.spine:
|
||||||
|
self.log.debug('Converting %s to HTML...' % item.href)
|
||||||
|
stylizer = Stylizer(item.data, item.href, oeb_book, self.opts)
|
||||||
|
output += self.dump_text(item.data.find(XHTML('body')), stylizer, item)
|
||||||
|
output.append('\n\n')
|
||||||
|
output.append('</body></html>')
|
||||||
|
return ''.join(output)
|
||||||
|
|
||||||
|
def dump_text(self, elem, stylizer, page):
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def get_link_id(self, href, aid):
|
||||||
|
aid = '%s#%s' % (href, aid)
|
||||||
|
if aid not in self.links:
|
||||||
|
self.links[aid] = 'calibre_link-%s' % len(self.links.keys())
|
||||||
|
return self.links[aid]
|
||||||
|
|
||||||
|
def rewrite_links(self, tag, attribs, page):
|
||||||
|
# Rewrite ids.
|
||||||
|
if 'id' in attribs:
|
||||||
|
attribs['id'] = self.get_link_id(page.href, attribs['id'])
|
||||||
|
# Rewrite links.
|
||||||
|
if tag == 'a':
|
||||||
|
href = attribs['href']
|
||||||
|
href = page.abshref(href)
|
||||||
|
if self.url_is_relative(href):
|
||||||
|
if '#' not in href:
|
||||||
|
href += '#'
|
||||||
|
if href not in self.links:
|
||||||
|
self.links[href] = 'calibre_link-%s' % len(self.links.keys())
|
||||||
|
href = '#%s' % self.links[href]
|
||||||
|
attribs['href'] = href
|
||||||
|
return attribs
|
||||||
|
|
||||||
|
def rewrite_images(self, tag, attribs, page):
|
||||||
|
if tag == 'img':
|
||||||
|
src = attribs.get('src', None)
|
||||||
|
if src:
|
||||||
|
src = page.abshref(src)
|
||||||
|
if src not in self.images:
|
||||||
|
ext = os.path.splitext(src)[1]
|
||||||
|
fname = '%s%s' % (len(self.images), ext)
|
||||||
|
fname = fname.zfill(10)
|
||||||
|
self.images[src] = fname
|
||||||
|
attribs['src'] = 'images/%s' % self.images[src]
|
||||||
|
return attribs
|
||||||
|
|
||||||
|
def url_is_relative(self, url):
|
||||||
|
o = urlparse(url)
|
||||||
|
return False if o.scheme else True
|
||||||
|
|
||||||
|
def get_css(self, oeb_book):
|
||||||
|
css = u''
|
||||||
|
for item in oeb_book.manifest:
|
||||||
|
if item.media_type == 'text/css':
|
||||||
|
css = item.data.cssText
|
||||||
|
break
|
||||||
|
return css
|
||||||
|
|
||||||
|
|
||||||
|
class OEB2HTMLNoCSSizer(OEB2HTML):
|
||||||
|
'''
|
||||||
|
This will remap a small number of CSS styles to equivalent HTML tags.
|
||||||
|
'''
|
||||||
|
|
||||||
|
def dump_text(self, elem, stylizer, page):
|
||||||
|
'''
|
||||||
|
@elem: The element in the etree that we are working on.
|
||||||
|
@stylizer: The style information attached to the element.
|
||||||
|
'''
|
||||||
|
|
||||||
|
# We can only processes tags. If there isn't a tag return any text.
|
||||||
|
if not isinstance(elem.tag, basestring) \
|
||||||
|
or namespace(elem.tag) != XHTML_NS:
|
||||||
|
p = elem.getparent()
|
||||||
|
if p is not None and isinstance(p.tag, basestring) and namespace(p.tag) == XHTML_NS \
|
||||||
|
and elem.tail:
|
||||||
|
return [elem.tail]
|
||||||
|
return ['']
|
||||||
|
|
||||||
|
# Setup our variables.
|
||||||
|
text = ['']
|
||||||
|
style = stylizer.style(elem)
|
||||||
|
tags = []
|
||||||
|
tag = barename(elem.tag)
|
||||||
|
attribs = elem.attrib
|
||||||
|
if tag == 'body':
|
||||||
|
tag = 'div'
|
||||||
|
attribs['id'] = self.get_link_id(page.href, '')
|
||||||
|
tags.append(tag)
|
||||||
|
|
||||||
|
# Ignore anything that is set to not be displayed.
|
||||||
|
if style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') \
|
||||||
|
or style['visibility'] == 'hidden':
|
||||||
|
return ['']
|
||||||
|
|
||||||
|
# Remove attributes we won't want.
|
||||||
|
if 'class' in attribs:
|
||||||
|
del attribs['class']
|
||||||
|
if 'style' in attribs:
|
||||||
|
del attribs['style']
|
||||||
|
|
||||||
|
attribs = self.rewrite_links(tag, attribs, page)
|
||||||
|
attribs = self.rewrite_images(tag, attribs, page)
|
||||||
|
|
||||||
|
# Turn the rest of the attributes into a string we can write with the tag.
|
||||||
|
at = ''
|
||||||
|
for k, v in attribs.items():
|
||||||
|
at += ' %s="%s"' % (k, prepare_string_for_xml(v, attribute=True))
|
||||||
|
|
||||||
|
# Write the tag.
|
||||||
|
text.append('<%s%s>' % (tag, at))
|
||||||
|
|
||||||
|
# Turn styles into tags.
|
||||||
|
if style['font-weight'] in ('bold', 'bolder'):
|
||||||
|
text.append('<b>')
|
||||||
|
tags.append('b')
|
||||||
|
if style['font-style'] == 'italic':
|
||||||
|
text.append('<i>')
|
||||||
|
tags.append('i')
|
||||||
|
if style['text-decoration'] == 'underline':
|
||||||
|
text.append('<u>')
|
||||||
|
tags.append('u')
|
||||||
|
if style['text-decoration'] == 'line-through':
|
||||||
|
text.append('<s>')
|
||||||
|
tags.append('s')
|
||||||
|
|
||||||
|
# Process tags that contain text.
|
||||||
|
if hasattr(elem, 'text') and elem.text:
|
||||||
|
text.append(elem.text)
|
||||||
|
|
||||||
|
# Recurse down into tags within the tag we are in.
|
||||||
|
for item in elem:
|
||||||
|
text += self.dump_text(item, stylizer, page)
|
||||||
|
|
||||||
|
# Close all open tags.
|
||||||
|
tags.reverse()
|
||||||
|
for t in tags:
|
||||||
|
text.append('</%s>' % t)
|
||||||
|
|
||||||
|
# Add the text that is outside of the tag.
|
||||||
|
if hasattr(elem, 'tail') and elem.tail:
|
||||||
|
text.append(elem.tail)
|
||||||
|
|
||||||
|
return text
|
||||||
|
|
||||||
|
|
||||||
|
class OEB2HTMLInlineCSSizer(OEB2HTML):
|
||||||
|
'''
|
||||||
|
Turns external CSS classes into inline style attributes.
|
||||||
|
'''
|
||||||
|
|
||||||
|
def dump_text(self, elem, stylizer, page):
|
||||||
|
'''
|
||||||
|
@elem: The element in the etree that we are working on.
|
||||||
|
@stylizer: The style information attached to the element.
|
||||||
|
'''
|
||||||
|
|
||||||
|
# We can only processes tags. If there isn't a tag return any text.
|
||||||
|
if not isinstance(elem.tag, basestring) \
|
||||||
|
or namespace(elem.tag) != XHTML_NS:
|
||||||
|
p = elem.getparent()
|
||||||
|
if p is not None and isinstance(p.tag, basestring) and namespace(p.tag) == XHTML_NS \
|
||||||
|
and elem.tail:
|
||||||
|
return [elem.tail]
|
||||||
|
return ['']
|
||||||
|
|
||||||
|
# Setup our variables.
|
||||||
|
text = ['']
|
||||||
|
style = stylizer.style(elem)
|
||||||
|
tags = []
|
||||||
|
tag = barename(elem.tag)
|
||||||
|
attribs = elem.attrib
|
||||||
|
|
||||||
|
style_a = '%s' % style
|
||||||
|
if tag == 'body':
|
||||||
|
tag = 'div'
|
||||||
|
attribs['id'] = self.get_link_id(page.href, '')
|
||||||
|
if not style['page-break-before'] == 'always':
|
||||||
|
style_a = 'page-break-before: always;' + ' ' if style_a else '' + style_a
|
||||||
|
tags.append(tag)
|
||||||
|
|
||||||
|
# Remove attributes we won't want.
|
||||||
|
if 'class' in attribs:
|
||||||
|
del attribs['class']
|
||||||
|
if 'style' in attribs:
|
||||||
|
del attribs['style']
|
||||||
|
|
||||||
|
attribs = self.rewrite_links(tag, attribs, page)
|
||||||
|
attribs = self.rewrite_images(tag, attribs, page)
|
||||||
|
|
||||||
|
# Turn the rest of the attributes into a string we can write with the tag.
|
||||||
|
at = ''
|
||||||
|
for k, v in attribs.items():
|
||||||
|
at += ' %s="%s"' % (k, prepare_string_for_xml(v, attribute=True))
|
||||||
|
|
||||||
|
# Turn style into strings for putting in the tag.
|
||||||
|
style_t = ''
|
||||||
|
if style_a:
|
||||||
|
style_t = ' style="%s"' % style_a
|
||||||
|
|
||||||
|
# Write the tag.
|
||||||
|
text.append('<%s%s%s>' % (tag, at, style_t))
|
||||||
|
|
||||||
|
# Process tags that contain text.
|
||||||
|
if hasattr(elem, 'text') and elem.text:
|
||||||
|
text.append(elem.text)
|
||||||
|
|
||||||
|
# Recurse down into tags within the tag we are in.
|
||||||
|
for item in elem:
|
||||||
|
text += self.dump_text(item, stylizer, page)
|
||||||
|
|
||||||
|
# Close all open tags.
|
||||||
|
tags.reverse()
|
||||||
|
for t in tags:
|
||||||
|
text.append('</%s>' % t)
|
||||||
|
|
||||||
|
# Add the text that is outside of the tag.
|
||||||
|
if hasattr(elem, 'tail') and elem.tail:
|
||||||
|
text.append(elem.tail)
|
||||||
|
|
||||||
|
return text
|
||||||
|
|
||||||
|
|
||||||
|
class OEB2HTMLClassCSSizer(OEB2HTML):
|
||||||
|
'''
|
||||||
|
Use CSS classes. css_style option can specify whether to use
|
||||||
|
inline classes (style tag in the head) or reference an external
|
||||||
|
CSS file called style.css.
|
||||||
|
'''
|
||||||
|
|
||||||
|
def mlize_spine(self, oeb_book):
|
||||||
|
output = []
|
||||||
|
for item in oeb_book.spine:
|
||||||
|
self.log.debug('Converting %s to HTML...' % item.href)
|
||||||
|
stylizer = Stylizer(item.data, item.href, oeb_book, self.opts)
|
||||||
|
output += self.dump_text(item.data.find(XHTML('body')), stylizer, item)
|
||||||
|
output.append('\n\n')
|
||||||
|
if self.opts.htmlz_class_style == 'external':
|
||||||
|
css = u'<link href="style.css" rel="stylesheet" type="text/css" />'
|
||||||
|
else:
|
||||||
|
css = u'<style type="text/css">' + self.get_css(oeb_book) + u'</style>'
|
||||||
|
output = [u'<html><head><meta http-equiv="Content-Type" content="text/html;charset=utf-8" />'] + [css] + [u'</head><body>'] + output + [u'</body></html>']
|
||||||
|
return ''.join(output)
|
||||||
|
|
||||||
|
def dump_text(self, elem, stylizer, page):
|
||||||
|
'''
|
||||||
|
@elem: The element in the etree that we are working on.
|
||||||
|
@stylizer: The style information attached to the element.
|
||||||
|
'''
|
||||||
|
|
||||||
|
# We can only processes tags. If there isn't a tag return any text.
|
||||||
|
if not isinstance(elem.tag, basestring) \
|
||||||
|
or namespace(elem.tag) != XHTML_NS:
|
||||||
|
p = elem.getparent()
|
||||||
|
if p is not None and isinstance(p.tag, basestring) and namespace(p.tag) == XHTML_NS \
|
||||||
|
and elem.tail:
|
||||||
|
return [elem.tail]
|
||||||
|
return ['']
|
||||||
|
|
||||||
|
# Setup our variables.
|
||||||
|
text = ['']
|
||||||
|
#style = stylizer.style(elem)
|
||||||
|
tags = []
|
||||||
|
tag = barename(elem.tag)
|
||||||
|
attribs = elem.attrib
|
||||||
|
|
||||||
|
if tag == 'body':
|
||||||
|
tag = 'div'
|
||||||
|
attribs['id'] = self.get_link_id(page.href, '')
|
||||||
|
tags.append(tag)
|
||||||
|
|
||||||
|
# Remove attributes we won't want.
|
||||||
|
if 'style' in attribs:
|
||||||
|
del attribs['style']
|
||||||
|
|
||||||
|
attribs = self.rewrite_links(tag, attribs, page)
|
||||||
|
attribs = self.rewrite_images(tag, attribs, page)
|
||||||
|
|
||||||
|
# Turn the rest of the attributes into a string we can write with the tag.
|
||||||
|
at = ''
|
||||||
|
for k, v in attribs.items():
|
||||||
|
at += ' %s="%s"' % (k, prepare_string_for_xml(v, attribute=True))
|
||||||
|
|
||||||
|
# Write the tag.
|
||||||
|
text.append('<%s%s>' % (tag, at))
|
||||||
|
|
||||||
|
# Process tags that contain text.
|
||||||
|
if hasattr(elem, 'text') and elem.text:
|
||||||
|
text.append(elem.text)
|
||||||
|
|
||||||
|
# Recurse down into tags within the tag we are in.
|
||||||
|
for item in elem:
|
||||||
|
text += self.dump_text(item, stylizer, page)
|
||||||
|
|
||||||
|
# Close all open tags.
|
||||||
|
tags.reverse()
|
||||||
|
for t in tags:
|
||||||
|
text.append('</%s>' % t)
|
||||||
|
|
||||||
|
# Add the text that is outside of the tag.
|
||||||
|
if hasattr(elem, 'tail') and elem.tail:
|
||||||
|
text.append(elem.tail)
|
||||||
|
|
||||||
|
return text
|
||||||
|
|
||||||
|
|
||||||
|
def oeb2html_no_css(oeb_book, log, opts):
|
||||||
|
izer = OEB2HTMLNoCSSizer(log)
|
||||||
|
html = izer.oeb2html(oeb_book, opts)
|
||||||
|
images = izer.images
|
||||||
|
return (html, images)
|
||||||
|
|
||||||
|
def oeb2html_inline_css(oeb_book, log, opts):
|
||||||
|
izer = OEB2HTMLInlineCSSizer(log)
|
||||||
|
html = izer.oeb2html(oeb_book, opts)
|
||||||
|
images = izer.images
|
||||||
|
return (html, images)
|
||||||
|
|
||||||
|
def oeb2html_class_css(oeb_book, log, opts):
|
||||||
|
izer = OEB2HTMLClassCSSizer(log)
|
||||||
|
setattr(opts, 'class_style', 'inline')
|
||||||
|
html = izer.oeb2html(oeb_book, opts)
|
||||||
|
images = izer.images
|
||||||
|
return (html, images)
|
83
src/calibre/ebooks/htmlz/output.py
Normal file
83
src/calibre/ebooks/htmlz/output.py
Normal file
@ -0,0 +1,83 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
from __future__ import (unicode_literals, division, absolute_import, print_function)
|
||||||
|
|
||||||
|
__license__ = 'GPL 3'
|
||||||
|
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
import os
|
||||||
|
|
||||||
|
from lxml import etree
|
||||||
|
|
||||||
|
from calibre.customize.conversion import OutputFormatPlugin, \
|
||||||
|
OptionRecommendation
|
||||||
|
from calibre.ebooks.oeb.base import OEB_IMAGES
|
||||||
|
from calibre.ptempfile import TemporaryDirectory
|
||||||
|
from calibre.utils.zipfile import ZipFile
|
||||||
|
|
||||||
|
class HTMLZOutput(OutputFormatPlugin):
|
||||||
|
|
||||||
|
name = 'HTMLZ Output'
|
||||||
|
author = 'John Schember'
|
||||||
|
file_type = 'htmlz'
|
||||||
|
|
||||||
|
options = set([
|
||||||
|
OptionRecommendation(name='htmlz_css_type', recommended_value='class',
|
||||||
|
level=OptionRecommendation.LOW,
|
||||||
|
choices=['class', 'inline', 'tag'],
|
||||||
|
help=_('Specify the handling of CSS. Default is class.\n'
|
||||||
|
'class: Use CSS classes and have elements reference them.\n'
|
||||||
|
'inline: Write the CSS as an inline style attribute.\n'
|
||||||
|
'tag: Turn as many CSS styles as possible into HTML tags.'
|
||||||
|
)),
|
||||||
|
OptionRecommendation(name='htmlz_class_style', recommended_value='external',
|
||||||
|
level=OptionRecommendation.LOW,
|
||||||
|
choices=['external', 'inline'],
|
||||||
|
help=_('How to handle the CSS when using css-type = \'class\'.\n'
|
||||||
|
'Default is external.\n'
|
||||||
|
'external: Use an external CSS file that is linked in the document.\n'
|
||||||
|
'inline: Place the CSS in the head section of the document.'
|
||||||
|
)),
|
||||||
|
])
|
||||||
|
|
||||||
|
def convert(self, oeb_book, output_path, input_plugin, opts, log):
|
||||||
|
# HTML
|
||||||
|
if opts.htmlz_css_type == 'inline':
|
||||||
|
from calibre.ebooks.htmlz.oeb2html import OEB2HTMLInlineCSSizer
|
||||||
|
OEB2HTMLizer = OEB2HTMLInlineCSSizer
|
||||||
|
elif opts.htmlz_css_type == 'tag':
|
||||||
|
from calibre.ebooks.htmlz.oeb2html import OEB2HTMLNoCSSizer
|
||||||
|
OEB2HTMLizer = OEB2HTMLNoCSSizer
|
||||||
|
else:
|
||||||
|
from calibre.ebooks.htmlz.oeb2html import OEB2HTMLClassCSSizer as OEB2HTMLizer
|
||||||
|
|
||||||
|
with TemporaryDirectory('_htmlz_output') as tdir:
|
||||||
|
htmlizer = OEB2HTMLizer(log)
|
||||||
|
html = htmlizer.oeb2html(oeb_book, opts)
|
||||||
|
|
||||||
|
with open(os.path.join(tdir, 'index.html'), 'wb') as tf:
|
||||||
|
tf.write(html)
|
||||||
|
|
||||||
|
# CSS
|
||||||
|
if opts.htmlz_css_type == 'class' and opts.htmlz_class_style == 'external':
|
||||||
|
with open(os.path.join(tdir, 'style.css'), 'wb') as tf:
|
||||||
|
tf.write(htmlizer.get_css(oeb_book))
|
||||||
|
|
||||||
|
# Images
|
||||||
|
images = htmlizer.images
|
||||||
|
if images:
|
||||||
|
if not os.path.exists(os.path.join(tdir, 'images')):
|
||||||
|
os.makedirs(os.path.join(tdir, 'images'))
|
||||||
|
for item in oeb_book.manifest:
|
||||||
|
if item.media_type in OEB_IMAGES and item.href in images:
|
||||||
|
fname = os.path.join(tdir, 'images', images[item.href])
|
||||||
|
with open(fname, 'wb') as img:
|
||||||
|
img.write(item.data)
|
||||||
|
|
||||||
|
# Metadata
|
||||||
|
with open(os.path.join(tdir, 'metadata.opf'), 'wb') as mdataf:
|
||||||
|
mdataf.write(etree.tostring(oeb_book.metadata.to_opf1()))
|
||||||
|
|
||||||
|
htmlz = ZipFile(output_path, 'w')
|
||||||
|
htmlz.add_dir(tdir)
|
@ -4,7 +4,7 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
|
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
|
||||||
|
|
||||||
'''
|
'''
|
||||||
Read meta information from TXT files
|
Read meta information from extZ (TXTZ, HTMLZ...) files.
|
||||||
'''
|
'''
|
||||||
|
|
||||||
import os
|
import os
|
@ -20,7 +20,8 @@ class RemoveAdobeMargins(object):
|
|||||||
self.oeb, self.opts, self.log = oeb, opts, log
|
self.oeb, self.opts, self.log = oeb, opts, log
|
||||||
|
|
||||||
for item in self.oeb.manifest:
|
for item in self.oeb.manifest:
|
||||||
if item.media_type == 'application/vnd.adobe-page-template+xml':
|
if item.media_type in ('application/vnd.adobe-page-template+xml',
|
||||||
|
'application/vnd.adobe.page-template+xml'):
|
||||||
self.log('Removing page margins specified in the'
|
self.log('Removing page margins specified in the'
|
||||||
' Adobe page template')
|
' Adobe page template')
|
||||||
for elem in item.data.xpath(
|
for elem in item.data.xpath(
|
||||||
@ -35,7 +36,7 @@ class RemoveFakeMargins(object):
|
|||||||
|
|
||||||
'''
|
'''
|
||||||
Remove left and right margins from paragraph/divs if the same margin is specified
|
Remove left and right margins from paragraph/divs if the same margin is specified
|
||||||
on almost all the elements of at that level.
|
on almost all the elements at that level.
|
||||||
|
|
||||||
Must be called only after CSS flattening
|
Must be called only after CSS flattening
|
||||||
'''
|
'''
|
||||||
|
@ -270,6 +270,8 @@ class BookInfo(QWebView):
|
|||||||
<style type="text/css">
|
<style type="text/css">
|
||||||
body, td {background-color: transparent; font-size: %dpx; color: %s }
|
body, td {background-color: transparent; font-size: %dpx; color: %s }
|
||||||
a { text-decoration: none; color: blue }
|
a { text-decoration: none; color: blue }
|
||||||
|
div.description { margin-top: 0; padding-top: 0; text-indent: 0 }
|
||||||
|
table { margin-bottom: 0; padding-bottom: 0; }
|
||||||
</style>
|
</style>
|
||||||
</head>
|
</head>
|
||||||
<body>
|
<body>
|
||||||
@ -278,9 +280,10 @@ class BookInfo(QWebView):
|
|||||||
<html>
|
<html>
|
||||||
'''%(f, c)
|
'''%(f, c)
|
||||||
if self.vertical:
|
if self.vertical:
|
||||||
|
extra = ''
|
||||||
if comments:
|
if comments:
|
||||||
rows += u'<tr><td colspan="2">%s</td></tr>'%comments
|
extra = u'<div class="description">%s</div>'%comments
|
||||||
self.setHtml(templ%(u'<table>%s</table>'%rows))
|
self.setHtml(templ%(u'<table>%s</table>%s'%(rows, extra)))
|
||||||
else:
|
else:
|
||||||
left_pane = u'<table>%s</table>'%rows
|
left_pane = u'<table>%s</table>'%rows
|
||||||
right_pane = u'<div>%s</div>'%comments
|
right_pane = u'<div>%s</div>'%comments
|
||||||
|
26
src/calibre/gui2/convert/htmlz_output.py
Normal file
26
src/calibre/gui2/convert/htmlz_output.py
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
__license__ = 'GPL 3'
|
||||||
|
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
from calibre.gui2.convert.htmlz_output_ui import Ui_Form
|
||||||
|
from calibre.gui2.convert import Widget
|
||||||
|
|
||||||
|
format_model = None
|
||||||
|
|
||||||
|
class PluginWidget(Widget, Ui_Form):
|
||||||
|
|
||||||
|
TITLE = _('HTMLZ Output')
|
||||||
|
HELP = _('Options specific to')+' HTMLZ '+_('output')
|
||||||
|
COMMIT_NAME = 'htmlz_output'
|
||||||
|
ICON = I('mimetypes/html.png')
|
||||||
|
|
||||||
|
def __init__(self, parent, get_option, get_help, db=None, book_id=None):
|
||||||
|
Widget.__init__(self, parent, ['htmlz_css_type', 'htmlz_class_style'])
|
||||||
|
self.db, self.book_id = db, book_id
|
||||||
|
for x in get_option('htmlz_css_type').option.choices:
|
||||||
|
self.opt_htmlz_css_type.addItem(x)
|
||||||
|
for x in get_option('htmlz_class_style').option.choices:
|
||||||
|
self.opt_htmlz_class_style.addItem(x)
|
||||||
|
self.initialize_options(get_option, get_help, db, book_id)
|
61
src/calibre/gui2/convert/htmlz_output.ui
Normal file
61
src/calibre/gui2/convert/htmlz_output.ui
Normal file
@ -0,0 +1,61 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<ui version="4.0">
|
||||||
|
<class>Form</class>
|
||||||
|
<widget class="QWidget" name="Form">
|
||||||
|
<property name="geometry">
|
||||||
|
<rect>
|
||||||
|
<x>0</x>
|
||||||
|
<y>0</y>
|
||||||
|
<width>438</width>
|
||||||
|
<height>300</height>
|
||||||
|
</rect>
|
||||||
|
</property>
|
||||||
|
<property name="windowTitle">
|
||||||
|
<string>Form</string>
|
||||||
|
</property>
|
||||||
|
<layout class="QGridLayout" name="gridLayout">
|
||||||
|
<item row="2" column="0">
|
||||||
|
<spacer name="verticalSpacer">
|
||||||
|
<property name="orientation">
|
||||||
|
<enum>Qt::Vertical</enum>
|
||||||
|
</property>
|
||||||
|
<property name="sizeHint" stdset="0">
|
||||||
|
<size>
|
||||||
|
<width>20</width>
|
||||||
|
<height>246</height>
|
||||||
|
</size>
|
||||||
|
</property>
|
||||||
|
</spacer>
|
||||||
|
</item>
|
||||||
|
<item row="0" column="0">
|
||||||
|
<widget class="QLabel" name="label">
|
||||||
|
<property name="text">
|
||||||
|
<string>How to handle CSS</string>
|
||||||
|
</property>
|
||||||
|
<property name="buddy">
|
||||||
|
<cstring>opt_htmlz_css_type</cstring>
|
||||||
|
</property>
|
||||||
|
</widget>
|
||||||
|
</item>
|
||||||
|
<item row="0" column="1">
|
||||||
|
<widget class="QComboBox" name="opt_htmlz_css_type">
|
||||||
|
<property name="minimumContentsLength">
|
||||||
|
<number>20</number>
|
||||||
|
</property>
|
||||||
|
</widget>
|
||||||
|
</item>
|
||||||
|
<item row="1" column="0">
|
||||||
|
<widget class="QLabel" name="label_2">
|
||||||
|
<property name="text">
|
||||||
|
<string>How to handle class based CSS</string>
|
||||||
|
</property>
|
||||||
|
</widget>
|
||||||
|
</item>
|
||||||
|
<item row="1" column="1">
|
||||||
|
<widget class="QComboBox" name="opt_htmlz_class_style"/>
|
||||||
|
</item>
|
||||||
|
</layout>
|
||||||
|
</widget>
|
||||||
|
<resources/>
|
||||||
|
<connections/>
|
||||||
|
</ui>
|
@ -136,17 +136,17 @@ class PostInstall:
|
|||||||
self.icon_resources = []
|
self.icon_resources = []
|
||||||
self.menu_resources = []
|
self.menu_resources = []
|
||||||
self.mime_resources = []
|
self.mime_resources = []
|
||||||
if islinux:
|
if islinux or isfreebsd:
|
||||||
self.setup_completion()
|
self.setup_completion()
|
||||||
self.install_man_pages()
|
self.install_man_pages()
|
||||||
if islinux:
|
if islinux or isfreebsd:
|
||||||
self.setup_desktop_integration()
|
self.setup_desktop_integration()
|
||||||
self.create_uninstaller()
|
self.create_uninstaller()
|
||||||
|
|
||||||
from calibre.utils.config import config_dir
|
from calibre.utils.config import config_dir
|
||||||
if os.path.exists(config_dir):
|
if os.path.exists(config_dir):
|
||||||
os.chdir(config_dir)
|
os.chdir(config_dir)
|
||||||
if islinux:
|
if islinux or isfreebsd:
|
||||||
for f in os.listdir('.'):
|
for f in os.listdir('.'):
|
||||||
if os.stat(f).st_uid == 0:
|
if os.stat(f).st_uid == 0:
|
||||||
os.rmdir(f) if os.path.isdir(f) else os.unlink(f)
|
os.rmdir(f) if os.path.isdir(f) else os.unlink(f)
|
||||||
|
@ -40,3 +40,84 @@ Sections
|
|||||||
glossary
|
glossary
|
||||||
|
|
||||||
|
|
||||||
|
The main |app| user interface
|
||||||
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
|
||||||
|
.. toctree::
|
||||||
|
:maxdepth: 2
|
||||||
|
|
||||||
|
gui
|
||||||
|
|
||||||
|
Adding your favorite news website to |app|
|
||||||
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
|
||||||
|
.. toctree::
|
||||||
|
:maxdepth: 2
|
||||||
|
|
||||||
|
news
|
||||||
|
|
||||||
|
The |app| e-book viewer
|
||||||
|
^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
|
||||||
|
.. toctree::
|
||||||
|
:maxdepth: 2
|
||||||
|
|
||||||
|
viewer
|
||||||
|
|
||||||
|
Customizing |app|'s e-book conversion
|
||||||
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
|
||||||
|
.. toctree::
|
||||||
|
:maxdepth: 2
|
||||||
|
|
||||||
|
viewer
|
||||||
|
|
||||||
|
Editing e-book metadata
|
||||||
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
|
||||||
|
.. toctree::
|
||||||
|
:maxdepth: 2
|
||||||
|
|
||||||
|
viewer
|
||||||
|
|
||||||
|
Frequently Asked Questions
|
||||||
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
|
||||||
|
.. toctree::
|
||||||
|
:maxdepth: 2
|
||||||
|
|
||||||
|
faq
|
||||||
|
|
||||||
|
Tutorials
|
||||||
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
|
||||||
|
.. toctree::
|
||||||
|
:maxdepth: 2
|
||||||
|
|
||||||
|
tutorials
|
||||||
|
|
||||||
|
Customizing |app|
|
||||||
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
|
||||||
|
.. toctree::
|
||||||
|
:maxdepth: 2
|
||||||
|
|
||||||
|
customize
|
||||||
|
|
||||||
|
The Command Line Interface
|
||||||
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
|
||||||
|
.. toctree::
|
||||||
|
:maxdepth: 2
|
||||||
|
|
||||||
|
cli/cli-index
|
||||||
|
|
||||||
|
Setting up a |app| development environment
|
||||||
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
|
||||||
|
.. toctree::
|
||||||
|
:maxdepth: 2
|
||||||
|
|
||||||
|
develop
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user