Conversion of LRF books. This is very new code and has only been tested with calibre generated LRF books.

This commit is contained in:
Kovid Goyal 2009-09-16 21:45:09 -06:00
parent 61956a7dfd
commit 8eb15764b0
2 changed files with 498 additions and 23 deletions

View File

@ -1,17 +1,18 @@
<?xml version="1.0"?> <?xml version="1.0"?>
<xsl:stylesheet <xsl:stylesheet
xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:opf="http://www.idpf.org/2007/opf"
xmlns:c="calibre" xmlns:c="calibre"
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:opf="http://www.idpf.org/2007/opf"
xmlns:calibre="http://calibre.kovidgoyal.net/2009/metadata"
extension-element-prefixes="c" extension-element-prefixes="c"
xsl:version = "1.1" xsl:version = "1.1"
> >
<xsl:output method="xml" indent="yes"/> <xsl:output method="xml" indent="yes"/>
<xsl:template match="/"> <xsl:template match="/">
<package version="2.0" <package version="2.0">
unique-identifier="calibre_id"> <metadata>
<metadata xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:opf="http://www.idpf.org/2007/opf" xmlns:calibre="http://calibre.kovidgoyal.net/2009/metadata">
<xsl:call-template name="make-metadata"/> <xsl:call-template name="make-metadata"/>
</metadata> </metadata>
<manifest> <manifest>
@ -21,12 +22,113 @@
<xsl:call-template name="make-spine"/> <xsl:call-template name="make-spine"/>
</spine> </spine>
</package> </package>
<xsl:call-template name="make-ncx"/>
<xsl:call-template name="make-css"/>
<xsl:for-each select="//Page">
<xsl:call-template name="make-page"/>
</xsl:for-each>
</xsl:template>
<xsl:template name="make-css">
<xsl:for-each select="//TextStyle|//BlockStyle">
<c:styles/>
</xsl:for-each>
</xsl:template>
<xsl:template name="make-page">
<xsl:variable name="pid" select="@objid"/>
<xsl:document href="{$pid}.xhtml" method="xml" indent="yes">
<html>
<head>
<title><xsl:value-of select="//Title"/></title>
<link rel="stylesheet" type="text/css" href="styles.css"/>
</head>
<body class="body">
<xsl:apply-templates />
</body>
</html>
</xsl:document>
</xsl:template>
<xsl:template match="RuledLine">
<c:ruled-line/>
</xsl:template>
<xsl:template match="TextBlock">
<c:text-block/>
</xsl:template>
<xsl:template match="ImageBlock">
<c:image-block/>
</xsl:template>
<xsl:template match="Canvas">
<c:canvas/>
</xsl:template> </xsl:template>
<xsl:template name="make-metadata"> <xsl:template name="make-metadata">
<xsl:for-each select='//BookInformation/Info'> <xsl:for-each select='//BookInformation/Info/BookInfo'>
<c:metadata/> <xsl:apply-templates select="Title"/>
<xsl:apply-templates select="Author"/>
<xsl:apply-templates select="Publisher"/>
<xsl:apply-templates select="Category|Classification"/>
</xsl:for-each> </xsl:for-each>
<xsl:for-each select='//BookInformation/Info/DocInfo'>
<xsl:apply-templates select="Language"/>
<xsl:apply-templates select="Producer"/>
</xsl:for-each>
</xsl:template>
<xsl:template match="Title">
<xsl:element name="dc:title">
<xsl:if test="@reading and @reading != ''">
<xsl:attribute name="opf:file-as"><xsl:value-of select="@reading"/></xsl:attribute>
</xsl:if>
<xsl:value-of select="."/>
</xsl:element>
</xsl:template>
<xsl:template match="Author">
<xsl:element name="dc:creator">
<xsl:attribute name="opf:role">aut</xsl:attribute>
<xsl:if test="@reading and @reading != ''">
<xsl:attribute name="opf:file-as"><xsl:value-of select="@reading"/></xsl:attribute>
</xsl:if>
<xsl:value-of select="."/>
</xsl:element>
</xsl:template>
<xsl:template match="Publisher">
<xsl:element name="dc:publisher">
<xsl:if test="@reading and @reading != ''">
<xsl:attribute name="opf:file-as"><xsl:value-of select="@reading"/></xsl:attribute>
</xsl:if>
<xsl:value-of select="."/>
</xsl:element>
</xsl:template>
<xsl:template match="Producer">
<xsl:element name="dc:creator">
<xsl:attribute name="opf:role">bkp</xsl:attribute>
<xsl:if test="@reading and @reading != ''">
<xsl:attribute name="opf:file-as"><xsl:value-of select="@reading"/></xsl:attribute>
</xsl:if>
<xsl:value-of select="."/>
</xsl:element>
</xsl:template>
<xsl:template match="Language">
<xsl:element name="dc:language">
<xsl:value-of select="."/>
</xsl:element>
</xsl:template>
<xsl:template match="Category|Classification">
<xsl:if test=".!=''">
<xsl:element name="dc:subject">
<xsl:value-of select="."/>
</xsl:element>
</xsl:if>
</xsl:template> </xsl:template>
<xsl:template name="make-manifest"> <xsl:template name="make-manifest">
@ -52,6 +154,7 @@
</xsl:element> </xsl:element>
</xsl:for-each> </xsl:for-each>
<item id="ncx" href="toc.ncx" media-type="application/x-dtbncx+xml" /> <item id="ncx" href="toc.ncx" media-type="application/x-dtbncx+xml" />
<item id="styles" href="styles.css" media-type="text/css" />
</xsl:template> </xsl:template>
@ -72,4 +175,36 @@
<xsl:apply-templates/> <xsl:apply-templates/>
</xsl:template> </xsl:template>
<xsl:template name="make-ncx">
<xsl:document href="toc.ncx" method="xml" indent="yes">
<ncx version="2005-1"
xmlns="http://www.daisy.org/z3986/2005/ncx/"
xmlns:calibre="http://calibre.kovidgoyal.net/2009/metadata"
>
<head>
<meta name="dtb:uid" content="uid"/>
<meta name="dtb:depth" content="1"/>
<meta name="dtb:generator" content="calibre"/>
<meta name="dtb:totalPageCount" content="0"/>
<meta name="dtb:maxPageNumber" content="0"/>
</head>
<docTitle><text>Table of Contents</text></docTitle>
<navMap>
<xsl:for-each select="//TOC/TocLabel">
<xsl:element name="navPoint">
<xsl:attribute name="id"><xsl:value-of select="count(preceding-sibling::*)"/></xsl:attribute>
<xsl:attribute name="playOrder"><xsl:value-of select="count(preceding-sibling::*)+1"/></xsl:attribute>
<navLabel><text><xsl:value-of select="."/></text></navLabel>
<xsl:element name="content">
<xsl:attribute name="src">
<xsl:value-of select="@refpage"/>.xhtml#<xsl:value-of select="@refobj"/>
</xsl:attribute>
</xsl:element>
</xsl:element>
</xsl:for-each>
</navMap>
</ncx>
</xsl:document>
</xsl:template>
</xsl:stylesheet> </xsl:stylesheet>

View File

@ -6,7 +6,7 @@ __license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>' __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import os import os, textwrap
from copy import deepcopy from copy import deepcopy
from lxml import etree from lxml import etree
@ -14,6 +14,84 @@ from lxml import etree
from calibre.customize.conversion import InputFormatPlugin from calibre.customize.conversion import InputFormatPlugin
from calibre import guess_type from calibre import guess_type
class Canvas(etree.XSLTExtension):
def __init__(self, doc, styles, text_block, log):
self.doc = doc
self.styles = styles
self.text_block = text_block
self.log = log
self.processed = set([])
def execute(self, context, self_node, input_node, output_parent):
cid = input_node.get('objid', None)
if cid is None or cid in self.processed:
return
self.processed.add(cid)
input_node = self.doc.xpath('//Canvas[@objid="%s"]'%cid)[0]
objects = list(self.get_objects(input_node))
if len(objects) == 1 and objects[0][0].tag == 'ImageBlock':
self.image_page(input_node, objects[0][0], output_parent)
else:
canvases = [input_node]
for x in input_node.itersiblings():
if x.tag == 'Canvas':
oid = x.get('objid', None)
if oid is not None:
canvases.append(x)
self.processed.add(oid)
else:
break
table = etree.Element('table')
table.text = '\n\t'
for canvas in canvases:
oid = canvas.get('objid')
tr = table.makeelement('tr')
tr.set('id', oid)
tr.tail = '\n\t'
table.append(tr)
for obj, x, y in self.get_objects(canvas):
if obj.tag != 'TextBlock':
self.log.warn(obj.tag, 'elements in Canvas not supported')
continue
td = table.makeelement('td')
self.text_block.render_block(obj, td)
tr.append(td)
output_parent.append(table)
def image_page(self, input_node, block, output_parent):
div = etree.Element('div')
div.set('id', input_node.get('objid', 'scuzzy'))
div.set('class', 'image_page')
width = self.styles.to_num(block.get("xsize", None))
height = self.styles.to_num(block.get("ysize", None))
img = div.makeelement('img')
if width is not None:
img.set('width', str(int(width)))
if height is not None:
img.set('height', str(int(height)))
ref = block.get('refstream', None)
if ref is not None:
imstr = self.doc.xpath('//ImageStream[@objid="%s"]'%ref)
if imstr:
src = imstr[0].get('file', None)
if src:
img.set('src', src)
div.append(img)
output_parent.append(div)
def get_objects(self, node):
for x in node.xpath('descendant::PutObj[@refobj and @x1 and @y1]'):
objs = node.xpath('//*[@objid="%s"]'%x.get('refobj'))
x, y = map(self.styles.to_num, (x.get('x1'), x.get('y1')))
if objs and x is not None and y is not None:
yield objs[0], int(x), int(y)
class MediaType(etree.XSLTExtension): class MediaType(etree.XSLTExtension):
def execute(self, context, self_node, input_node, output_parent): def execute(self, context, self_node, input_node, output_parent):
name = input_node.get('file', None) name = input_node.get('file', None)
@ -22,22 +100,254 @@ class MediaType(etree.XSLTExtension):
typ = 'application/octet-stream' typ = 'application/octet-stream'
output_parent.text = typ output_parent.text = typ
class Metadata(etree.XSLTExtension): class ImageBlock(etree.XSLTExtension):
def __init__(self): def __init__(self, canvas):
from calibre.ebooks.oeb.base import DC, OPF, DC11_NS, OPF2_NS etree.XSLTExtension.__init__(self)
self.namespaces = {'dc':DC11_NS, 'opf':OPF2_NS} self.canvas = canvas
self.DC, self.OPF = DC, OPF
print self.namespaces def execute(self, context, self_node, input_node, output_parent):
self.canvas.image_page(input_node, input_node, output_parent)
class RuledLine(etree.XSLTExtension):
def execute(self, context, self_node, input_node, output_parent):
hr = etree.Element('hr')
output_parent.append(hr)
class TextBlock(etree.XSLTExtension):
def __init__(self, styles, char_button_map, plot_map, log):
etree.XSLTExtension.__init__(self)
self.styles = styles
self.log = log
self.char_button_map = char_button_map
self.plot_map = plot_map
def execute(self, context, self_node, input_node, output_parent): def execute(self, context, self_node, input_node, output_parent):
input_node = deepcopy(input_node) input_node = deepcopy(input_node)
titles = input_node.xpath('//Info//Title') div = etree.Element('div')
if titles: self.render_block(input_node, div)
tn = etree.Element(self.DC('title'), nsmap=self.namespaces) output_parent.append(div)
tn.text = titles[-1].text
tn.set(self.OPF('file-as'), 'boo') def render_block(self, node, root):
output_parent.append(tn) ts = node.get('textstyle', None)
classes = []
bs = node.get('blockstyle')
if bs in self.styles.block_style_map:
classes.append('bs%d'%self.styles.block_style_map[bs])
if ts in self.styles.text_style_map:
classes.append('ts%d'%self.styles.text_style_map[ts])
if classes:
root.set('class', ' '.join(classes))
objid = node.get('objid', None)
if objid:
root.set('id', objid)
root.text = node.text
self.root = root
self.parent = root
self.add_text_to = (self.parent, 'text')
for child in node:
self.process_child(child)
def add_text(self, text):
if text:
if getattr(self.add_text_to[0], self.add_text_to[1]) is None:
setattr(self.add_text_to[0], self.add_text_to[1], '')
setattr(self.add_text_to[0], self.add_text_to[1],
getattr(self.add_text_to[0], self.add_text_to[1])+ text)
def process_container(self, child, tgt):
idx = self.styles.get_text_styles(child)
if idx is not None:
tgt.set('class', 'ts%d'%idx)
self.parent.append(tgt)
orig_parent = self.parent
self.parent = tgt
self.add_text_to = (self.parent, 'text')
self.add_text(child.text)
for gchild in child:
self.process_child(gchild)
self.parent = orig_parent
self.add_text_to = (tgt, 'tail')
self.add_text(child.tail)
def process_child(self, child):
if child.tag == 'CR':
if self.parent == self.root:
self.parent = self.root.makeelement('p')
self.root.append(self.parent)
self.add_text_to = (self.parent, 'text')
else:
br = self.parent.makeelement('br')
self.parent.append(br)
self.add_text_to = (br, 'tail')
self.add_text(child.tail)
elif child.tag in ('P', 'Span', 'EmpLine', 'NoBR'):
span = self.root.makeelement('span')
if child.tag == 'EmpLine':
td = 'underline' if child.get('emplineposition', 'before') == 'before' else 'overline'
span.set('style', 'text-decoration: '+td)
self.process_container(child, span)
elif child.tag == 'Sup':
sup = self.root.makeelement('sup')
self.process_container(child, sup)
elif child.tag == 'Sub':
sub = self.root.makeelement('sub')
self.process_container(child, sub)
elif child.tag == 'Italic':
sup = self.root.makeelement('i')
self.process_container(child, sup)
elif child.tag == 'CharButton':
a = self.root.makeelement('a')
oid = child.get('refobj', None)
if oid in self.char_button_map:
a.set('href', self.char_button_map[oid])
self.process_container(child, a)
elif child.tag == 'Plot':
xsize = self.styles.to_num(child.get('xsize', None), 166./720)
ysize = self.styles.to_num(child.get('ysize', None), 166./720)
img = self.root.makeelement('img')
if xsize is not None:
img.set('width', str(int(xsize)))
if ysize is not None:
img.set('height', str(int(ysize)))
ro = child.get('refobj', None)
if ro in self.plot_map:
img.set('src', self.plot_map[ro])
self.parent.append(img)
self.add_text_to = (img, 'tail')
self.add_text(child.tail)
else:
self.log.warn('Unhandled Text element:', child.tag)
class Styles(etree.XSLTExtension):
def __init__(self):
etree.XSLTExtension.__init__(self)
self.text_styles, self.block_styles = [], []
self.text_style_map, self.block_style_map = {}, {}
self.CSS = textwrap.dedent('''
.image_page { text-align:center }
''')
def write(self, name='styles.css'):
def join(style):
ans = ['%s : %s;'%(k, v) for k, v in style.items()]
if ans:
ans[-1] = ans[-1][:-1]
return '\n\t'.join(ans)
with open(name, 'wb') as f:
f.write(self.CSS)
for (w, sel) in [(self.text_styles, 'ts'), (self.block_styles,
'bs')]:
for i, s in enumerate(w):
if not s:
continue
rsel = '.%s%d'%(sel, i)
s = join(s)
f.write(rsel + ' {\n\t' + s + '\n}\n\n')
def execute(self, context, self_node, input_node, output_parent):
if input_node.tag == 'TextStyle':
idx = self.get_text_styles(input_node)
if idx is not None:
self.text_style_map[input_node.get('objid')] = idx
else:
idx = self.get_block_styles(input_node)
self.block_style_map[input_node.get('objid')] = idx
def px_to_pt(self, px):
try:
px = float(px)
return px * 72./166.
except:
return None
def color(self, val):
try:
val = int(val, 16)
r, g, b, a = val & 0xFF, (val>>8)&0xFF, (val>>16)&0xFF, (val>>24)&0xFF
if a == 255:
return None
if a == 0:
return 'rgb(%d,%d,%d)'%(r,g,b)
return 'rgba(%d,%d,%d,%f)'%(r,g,b,1.-a/255.)
except:
return None
def get_block_styles(self, node):
ans = {}
sm = self.px_to_pt(node.get('sidemargin', None))
if sm is not None:
ans['margin-left'] = ans['margin-right'] = '%fpt'%sm
ts = self.px_to_pt(node.get('topskip', None))
if ts is not None:
ans['margin-top'] = '%fpt'%ts
fs = self.px_to_pt(node.get('footskip', None))
if fs is not None:
ans['margin-bottom'] = '%fpt'%fs
fw = self.px_to_pt(node.get('framewidth', None))
if fw is not None:
ans['border-width'] = '%fpt'%fw
ans['border-style'] = 'solid'
fc = self.color(node.get('framecolor', None))
if fc is not None:
ans['border-color'] = fc
bc = self.color(node.get('bgcolor', None))
if bc is not None:
ans['background-color'] = bc
if ans not in self.block_styles:
self.block_styles.append(ans)
return self.block_styles.index(ans)
def to_num(self, val, factor=1.):
try:
return float(val)*factor
except:
return None
def get_text_styles(self, node):
ans = {}
fs = self.to_num(node.get('fontsize', None), 0.1)
if fs is not None:
ans['font-size'] = '%fpt'%fs
fw = self.to_num(node.get('fontweight', None))
if fw is not None:
ans['font-weight'] = ('bold' if fw >= 700 else 'normal')
#fn = getattr(obj, 'fontfacename', None)
#if fn is not None:
# fn = cls.FONT_MAP[fn]
# item('font-family: %s;'%fn)
fg = self.color(node.get('textcolor', None))
if fg is not None:
ans['color'] = fg
bg = self.color(node.get('textbgcolor', None))
if bg is not None:
ans['background-color'] = bg
al = node.get('align', None)
if al is not None:
all = dict(head='left', center='center', foot='right')
ans['text-align'] = all.get(al, 'left')
#lh = self.to_num(node.get('linespace', None), 0.1)
#if lh is not None:
# ans['line-height'] = '%fpt'%lh
pi = self.to_num(node.get('parindent', None), 0.1)
if pi is not None:
ans['text-indent'] = '%fpt'%pi
if not ans:
return None
if ans not in self.text_styles:
self.text_styles.append(ans)
return self.text_styles.index(ans)
class LRFInput(InputFormatPlugin): class LRFInput(InputFormatPlugin):
@ -57,16 +367,46 @@ class LRFInput(InputFormatPlugin):
xml = d.to_xml(write_files=True) xml = d.to_xml(write_files=True)
parser = etree.XMLParser(recover=True, no_network=True) parser = etree.XMLParser(recover=True, no_network=True)
doc = etree.fromstring(xml, parser=parser) doc = etree.fromstring(xml, parser=parser)
char_button_map = {}
for x in doc.xpath('//CharButton[@refobj]'):
ro = x.get('refobj')
jump_button = doc.xpath('//*[@objid="%s"]'%ro)
if jump_button:
jump_to = jump_button[0].xpath('descendant::JumpTo[@refpage and @refobj]')
if jump_to:
char_button_map[ro] = '%s.xhtml#%s'%(jump_to[0].get('refpage'),
jump_to[0].get('refobj'))
plot_map = {}
for x in doc.xpath('//Plot[@refobj]'):
ro = x.get('refobj')
image = doc.xpath('//Image[@objid="%s" and @refstream]'%ro)
if image:
imgstr = doc.xpath('//ImageStream[@objid="%s" and @file]'%
image[0].get('refstream'))
if imgstr:
plot_map[ro] = imgstr[0].get('file')
self.log('Converting XML to HTML...') self.log('Converting XML to HTML...')
styledoc = etree.fromstring(P('templates/lrf.xsl', data=True)) styledoc = etree.fromstring(P('templates/lrf.xsl', data=True))
media_type, metadata = MediaType(), Metadata() media_type = MediaType()
extensions = { ('calibre', 'media-type') : media_type, styles = Styles()
('calibre', 'metadata'): metadata} text_block = TextBlock(styles, char_button_map, plot_map, log)
canvas = Canvas(doc, styles, text_block, log)
image_block = ImageBlock(canvas)
ruled_line = RuledLine()
extensions = {
('calibre', 'media-type') : media_type,
('calibre', 'text-block') : text_block,
('calibre', 'ruled-line') : ruled_line,
('calibre', 'styles') : styles,
('calibre', 'canvas') : canvas,
('calibre', 'image-block'): image_block,
}
transform = etree.XSLT(styledoc, extensions=extensions) transform = etree.XSLT(styledoc, extensions=extensions)
result = transform(doc) result = transform(doc)
with open('content.opf', 'wb') as f: with open('content.opf', 'wb') as f:
f.write(result) f.write(result)
styles.write()
return os.path.abspath('content.opf') return os.path.abspath('content.opf')