diff --git a/resources/templates/lrf.xsl b/resources/templates/lrf.xsl index a303207704..d197ab6eb8 100644 --- a/resources/templates/lrf.xsl +++ b/resources/templates/lrf.xsl @@ -1,17 +1,18 @@ - - + + @@ -21,12 +22,113 @@ + + + + + + + + + + + + + + + + + + + <xsl:value-of select="//Title"/> + + + + + + + + + + + + + + + + + + + + + + + - - + + + + + + + + + + + + + + + + + + + + + + + aut + + + + + + + + + + + + + + + + + + + bkp + + + + + + + + + + + + + + + + + + + @@ -52,6 +154,7 @@ + @@ -72,4 +175,36 @@ + + + + + + + + + + + + Table of Contents + + + + + + + + + .xhtml# + + + + + + + + diff --git a/src/calibre/ebooks/lrf/input.py b/src/calibre/ebooks/lrf/input.py index 1172974aa6..9ec1ba0b8e 100644 --- a/src/calibre/ebooks/lrf/input.py +++ b/src/calibre/ebooks/lrf/input.py @@ -6,7 +6,7 @@ __license__ = 'GPL v3' __copyright__ = '2009, Kovid Goyal ' __docformat__ = 'restructuredtext en' -import os +import os, textwrap from copy import deepcopy from lxml import etree @@ -14,6 +14,84 @@ from lxml import etree from calibre.customize.conversion import InputFormatPlugin from calibre import guess_type +class Canvas(etree.XSLTExtension): + + def __init__(self, doc, styles, text_block, log): + self.doc = doc + self.styles = styles + self.text_block = text_block + self.log = log + self.processed = set([]) + + def execute(self, context, self_node, input_node, output_parent): + cid = input_node.get('objid', None) + if cid is None or cid in self.processed: + return + self.processed.add(cid) + input_node = self.doc.xpath('//Canvas[@objid="%s"]'%cid)[0] + + objects = list(self.get_objects(input_node)) + if len(objects) == 1 and objects[0][0].tag == 'ImageBlock': + self.image_page(input_node, objects[0][0], output_parent) + else: + canvases = [input_node] + for x in input_node.itersiblings(): + if x.tag == 'Canvas': + oid = x.get('objid', None) + if oid is not None: + canvases.append(x) + self.processed.add(oid) + else: + break + + + table = etree.Element('table') + table.text = '\n\t' + for canvas in canvases: + oid = canvas.get('objid') + tr = table.makeelement('tr') + tr.set('id', oid) + tr.tail = '\n\t' + table.append(tr) + for obj, x, y in self.get_objects(canvas): + if obj.tag != 'TextBlock': + self.log.warn(obj.tag, 'elements in Canvas not supported') + continue + td = table.makeelement('td') + self.text_block.render_block(obj, td) + tr.append(td) + output_parent.append(table) + + def image_page(self, input_node, block, output_parent): + div = etree.Element('div') + div.set('id', input_node.get('objid', 'scuzzy')) + div.set('class', 'image_page') + width = self.styles.to_num(block.get("xsize", None)) + height = self.styles.to_num(block.get("ysize", None)) + img = div.makeelement('img') + if width is not None: + img.set('width', str(int(width))) + if height is not None: + img.set('height', str(int(height))) + ref = block.get('refstream', None) + if ref is not None: + imstr = self.doc.xpath('//ImageStream[@objid="%s"]'%ref) + if imstr: + src = imstr[0].get('file', None) + if src: + img.set('src', src) + div.append(img) + output_parent.append(div) + + + def get_objects(self, node): + for x in node.xpath('descendant::PutObj[@refobj and @x1 and @y1]'): + objs = node.xpath('//*[@objid="%s"]'%x.get('refobj')) + x, y = map(self.styles.to_num, (x.get('x1'), x.get('y1'))) + if objs and x is not None and y is not None: + yield objs[0], int(x), int(y) + + class MediaType(etree.XSLTExtension): def execute(self, context, self_node, input_node, output_parent): name = input_node.get('file', None) @@ -22,22 +100,254 @@ class MediaType(etree.XSLTExtension): typ = 'application/octet-stream' output_parent.text = typ -class Metadata(etree.XSLTExtension): +class ImageBlock(etree.XSLTExtension): - def __init__(self): - from calibre.ebooks.oeb.base import DC, OPF, DC11_NS, OPF2_NS - self.namespaces = {'dc':DC11_NS, 'opf':OPF2_NS} - self.DC, self.OPF = DC, OPF - print self.namespaces + def __init__(self, canvas): + etree.XSLTExtension.__init__(self) + self.canvas = canvas + + def execute(self, context, self_node, input_node, output_parent): + self.canvas.image_page(input_node, input_node, output_parent) + + +class RuledLine(etree.XSLTExtension): + + def execute(self, context, self_node, input_node, output_parent): + hr = etree.Element('hr') + output_parent.append(hr) + + +class TextBlock(etree.XSLTExtension): + + def __init__(self, styles, char_button_map, plot_map, log): + etree.XSLTExtension.__init__(self) + self.styles = styles + self.log = log + self.char_button_map = char_button_map + self.plot_map = plot_map def execute(self, context, self_node, input_node, output_parent): input_node = deepcopy(input_node) - titles = input_node.xpath('//Info//Title') - if titles: - tn = etree.Element(self.DC('title'), nsmap=self.namespaces) - tn.text = titles[-1].text - tn.set(self.OPF('file-as'), 'boo') - output_parent.append(tn) + div = etree.Element('div') + self.render_block(input_node, div) + output_parent.append(div) + + def render_block(self, node, root): + ts = node.get('textstyle', None) + classes = [] + bs = node.get('blockstyle') + if bs in self.styles.block_style_map: + classes.append('bs%d'%self.styles.block_style_map[bs]) + if ts in self.styles.text_style_map: + classes.append('ts%d'%self.styles.text_style_map[ts]) + if classes: + root.set('class', ' '.join(classes)) + objid = node.get('objid', None) + if objid: + root.set('id', objid) + root.text = node.text + self.root = root + self.parent = root + self.add_text_to = (self.parent, 'text') + for child in node: + self.process_child(child) + + def add_text(self, text): + if text: + if getattr(self.add_text_to[0], self.add_text_to[1]) is None: + setattr(self.add_text_to[0], self.add_text_to[1], '') + setattr(self.add_text_to[0], self.add_text_to[1], + getattr(self.add_text_to[0], self.add_text_to[1])+ text) + + def process_container(self, child, tgt): + idx = self.styles.get_text_styles(child) + if idx is not None: + tgt.set('class', 'ts%d'%idx) + self.parent.append(tgt) + orig_parent = self.parent + self.parent = tgt + self.add_text_to = (self.parent, 'text') + self.add_text(child.text) + for gchild in child: + self.process_child(gchild) + self.parent = orig_parent + self.add_text_to = (tgt, 'tail') + self.add_text(child.tail) + + def process_child(self, child): + if child.tag == 'CR': + if self.parent == self.root: + self.parent = self.root.makeelement('p') + self.root.append(self.parent) + self.add_text_to = (self.parent, 'text') + else: + br = self.parent.makeelement('br') + self.parent.append(br) + self.add_text_to = (br, 'tail') + self.add_text(child.tail) + elif child.tag in ('P', 'Span', 'EmpLine', 'NoBR'): + span = self.root.makeelement('span') + if child.tag == 'EmpLine': + td = 'underline' if child.get('emplineposition', 'before') == 'before' else 'overline' + span.set('style', 'text-decoration: '+td) + self.process_container(child, span) + elif child.tag == 'Sup': + sup = self.root.makeelement('sup') + self.process_container(child, sup) + elif child.tag == 'Sub': + sub = self.root.makeelement('sub') + self.process_container(child, sub) + elif child.tag == 'Italic': + sup = self.root.makeelement('i') + self.process_container(child, sup) + elif child.tag == 'CharButton': + a = self.root.makeelement('a') + oid = child.get('refobj', None) + if oid in self.char_button_map: + a.set('href', self.char_button_map[oid]) + self.process_container(child, a) + elif child.tag == 'Plot': + xsize = self.styles.to_num(child.get('xsize', None), 166./720) + ysize = self.styles.to_num(child.get('ysize', None), 166./720) + img = self.root.makeelement('img') + if xsize is not None: + img.set('width', str(int(xsize))) + if ysize is not None: + img.set('height', str(int(ysize))) + ro = child.get('refobj', None) + if ro in self.plot_map: + img.set('src', self.plot_map[ro]) + self.parent.append(img) + self.add_text_to = (img, 'tail') + self.add_text(child.tail) + else: + self.log.warn('Unhandled Text element:', child.tag) + + +class Styles(etree.XSLTExtension): + + def __init__(self): + etree.XSLTExtension.__init__(self) + self.text_styles, self.block_styles = [], [] + self.text_style_map, self.block_style_map = {}, {} + self.CSS = textwrap.dedent(''' + .image_page { text-align:center } + ''') + + def write(self, name='styles.css'): + + def join(style): + ans = ['%s : %s;'%(k, v) for k, v in style.items()] + if ans: + ans[-1] = ans[-1][:-1] + return '\n\t'.join(ans) + + with open(name, 'wb') as f: + f.write(self.CSS) + for (w, sel) in [(self.text_styles, 'ts'), (self.block_styles, + 'bs')]: + for i, s in enumerate(w): + if not s: + continue + rsel = '.%s%d'%(sel, i) + s = join(s) + f.write(rsel + ' {\n\t' + s + '\n}\n\n') + + + + def execute(self, context, self_node, input_node, output_parent): + if input_node.tag == 'TextStyle': + idx = self.get_text_styles(input_node) + if idx is not None: + self.text_style_map[input_node.get('objid')] = idx + else: + idx = self.get_block_styles(input_node) + self.block_style_map[input_node.get('objid')] = idx + + def px_to_pt(self, px): + try: + px = float(px) + return px * 72./166. + except: + return None + + def color(self, val): + try: + val = int(val, 16) + r, g, b, a = val & 0xFF, (val>>8)&0xFF, (val>>16)&0xFF, (val>>24)&0xFF + if a == 255: + return None + if a == 0: + return 'rgb(%d,%d,%d)'%(r,g,b) + return 'rgba(%d,%d,%d,%f)'%(r,g,b,1.-a/255.) + except: + return None + + def get_block_styles(self, node): + ans = {} + sm = self.px_to_pt(node.get('sidemargin', None)) + if sm is not None: + ans['margin-left'] = ans['margin-right'] = '%fpt'%sm + ts = self.px_to_pt(node.get('topskip', None)) + if ts is not None: + ans['margin-top'] = '%fpt'%ts + fs = self.px_to_pt(node.get('footskip', None)) + if fs is not None: + ans['margin-bottom'] = '%fpt'%fs + fw = self.px_to_pt(node.get('framewidth', None)) + if fw is not None: + ans['border-width'] = '%fpt'%fw + ans['border-style'] = 'solid' + fc = self.color(node.get('framecolor', None)) + if fc is not None: + ans['border-color'] = fc + bc = self.color(node.get('bgcolor', None)) + if bc is not None: + ans['background-color'] = bc + if ans not in self.block_styles: + self.block_styles.append(ans) + return self.block_styles.index(ans) + + def to_num(self, val, factor=1.): + try: + return float(val)*factor + except: + return None + + def get_text_styles(self, node): + ans = {} + fs = self.to_num(node.get('fontsize', None), 0.1) + if fs is not None: + ans['font-size'] = '%fpt'%fs + fw = self.to_num(node.get('fontweight', None)) + if fw is not None: + ans['font-weight'] = ('bold' if fw >= 700 else 'normal') + #fn = getattr(obj, 'fontfacename', None) + #if fn is not None: + # fn = cls.FONT_MAP[fn] + # item('font-family: %s;'%fn) + fg = self.color(node.get('textcolor', None)) + if fg is not None: + ans['color'] = fg + bg = self.color(node.get('textbgcolor', None)) + if bg is not None: + ans['background-color'] = bg + al = node.get('align', None) + if al is not None: + all = dict(head='left', center='center', foot='right') + ans['text-align'] = all.get(al, 'left') + #lh = self.to_num(node.get('linespace', None), 0.1) + #if lh is not None: + # ans['line-height'] = '%fpt'%lh + pi = self.to_num(node.get('parindent', None), 0.1) + if pi is not None: + ans['text-indent'] = '%fpt'%pi + if not ans: + return None + if ans not in self.text_styles: + self.text_styles.append(ans) + return self.text_styles.index(ans) + class LRFInput(InputFormatPlugin): @@ -57,16 +367,46 @@ class LRFInput(InputFormatPlugin): xml = d.to_xml(write_files=True) parser = etree.XMLParser(recover=True, no_network=True) doc = etree.fromstring(xml, parser=parser) + char_button_map = {} + for x in doc.xpath('//CharButton[@refobj]'): + ro = x.get('refobj') + jump_button = doc.xpath('//*[@objid="%s"]'%ro) + if jump_button: + jump_to = jump_button[0].xpath('descendant::JumpTo[@refpage and @refobj]') + if jump_to: + char_button_map[ro] = '%s.xhtml#%s'%(jump_to[0].get('refpage'), + jump_to[0].get('refobj')) + plot_map = {} + for x in doc.xpath('//Plot[@refobj]'): + ro = x.get('refobj') + image = doc.xpath('//Image[@objid="%s" and @refstream]'%ro) + if image: + imgstr = doc.xpath('//ImageStream[@objid="%s" and @file]'% + image[0].get('refstream')) + if imgstr: + plot_map[ro] = imgstr[0].get('file') + self.log('Converting XML to HTML...') styledoc = etree.fromstring(P('templates/lrf.xsl', data=True)) - media_type, metadata = MediaType(), Metadata() - extensions = { ('calibre', 'media-type') : media_type, - ('calibre', 'metadata'): metadata} + media_type = MediaType() + styles = Styles() + text_block = TextBlock(styles, char_button_map, plot_map, log) + canvas = Canvas(doc, styles, text_block, log) + image_block = ImageBlock(canvas) + ruled_line = RuledLine() + extensions = { + ('calibre', 'media-type') : media_type, + ('calibre', 'text-block') : text_block, + ('calibre', 'ruled-line') : ruled_line, + ('calibre', 'styles') : styles, + ('calibre', 'canvas') : canvas, + ('calibre', 'image-block'): image_block, + } transform = etree.XSLT(styledoc, extensions=extensions) result = transform(doc) with open('content.opf', 'wb') as f: f.write(result) - + styles.write() return os.path.abspath('content.opf')