RB: output.

2025-07-09 03:04:10 -04:00 · 2009-05-23 19:26:21 -04:00 · 2009-05-23 19:26:21 -04:00 · 3659eb1b7a
commit 3659eb1b7a
parent afe9c08304
5 changed files with 384 additions and 24 deletions
--- a/src/calibre/ebooks/pdb/ereader/writer.py
+++ b/src/calibre/ebooks/pdb/ereader/writer.py
@ -99,7 +99,7 @@ class Writer(FormatWriter):
        publisher = ''
        isbn = ''

-        if metadata != None:
+        if metadata:
            if len(metadata.title) >= 1:
                title = metadata.title[0].value
            if len(metadata.creator) >= 1:
--- a/src/calibre/ebooks/rb/init.py
+++ b/src/calibre/ebooks/rb/init.py
@ -4,8 +4,23 @@ __license__ = 'GPL 3'
 __copyright__ = '2009, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'

+import os
+
 HEADER = '\xb0\x0c\xb0\x0c\x02\x00NUVO\x00\x00\x00\x00'

 class RocketBookError(Exception):
    pass

+
+def unique_name(name, used_names):
+    name = os.path.basename(name)
+    if len(name) < 32 and name not in used_names:
+        return name
+    else:
+        ext = os.path.splitext(name)[1][:3]
+        base_name = name[:22]
+        for i in range(0, 9999):
+            name = '%s-%s.%s' % (str(i).rjust('0', 4)[:4], base_name, ext)
+            if name not in used_names:
+                break
+        return name
--- a/src/calibre/ebooks/rb/output.py
+++ b/src/calibre/ebooks/rb/output.py
@ -0,0 +1,36 @@
+# -*- coding: utf-8 -*-
+
+__license__ = 'GPL 3'
+__copyright__ = '2009, John Schember <john@nachtimwald.com>'
+__docformat__ = 'restructuredtext en'
+
+import os
+
+from calibre.customize.conversion import OutputFormatPlugin
+from calibre.ebooks.rb.writer import RBWriter
+
+class RBOutput(OutputFormatPlugin):
+
+    name = 'RB Output'
+    author = 'John Schember'
+    file_type = 'rb'
+
+    def convert(self, oeb_book, output_path, input_plugin, opts, log):
+        close = False
+        if not hasattr(output_path, 'write'):
+            close = True
+            if not os.path.exists(os.path.dirname(output_path)) and os.path.dirname(output_path) != '':
+                os.makedirs(os.path.dirname(output_path))
+            out_stream = open(output_path, 'wb')
+        else:
+            out_stream = output_path
+
+        writer = RBWriter(opts, log)
+
+        out_stream.seek(0)
+        out_stream.truncate()
+
+        writer.write_content(oeb_book, out_stream, oeb_book.metadata)
+
+        if close:
+            out_stream.close()
--- a/src/calibre/ebooks/rb/rbml.py
+++ b/src/calibre/ebooks/rb/rbml.py
@ -0,0 +1,166 @@
+# -*- coding: utf-8 -*-
+
+__license__ = 'GPL 3'
+__copyright__ = '2009, John Schember <john@nachtimwald.com>'
+__docformat__ = 'restructuredtext en'
+
+'''
+Transform OEB content into RB compatible markup.
+'''
+
+import os
+import re
+
+from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace
+from calibre.ebooks.oeb.stylizer import Stylizer
+
+TAGS = [
+    'b',
+    'big',
+    'blockquote',
+    'br',
+    'center',
+    'code',
+    'div',
+    'h1',
+    'h2',
+    'h3',
+    'h4',
+    'h5',
+    'h6',
+    'hr',
+    'i',
+    'li',
+    'ol',
+    'p',
+    'pre',
+    'small',
+    'sub',
+    'sup',
+    'ul',
+]
+
+LINK_TAGS = [
+    'a',
+]
+
+STYLES = [
+    ('font-weight', {'bold'   : 'b', 'bolder' : 'b'}),
+    ('font-style', {'italic' : 'i'}),
+    ('text-align', {'center' : 'center'}),
+]
+
+class RBMLizer(object):
+
+    def __init__(self, name_map={}, ignore_tables=False):
+        self.name_map = name_map
+        self.ignore_tables = ignore_tables
+
+    def extract_content(self, oeb_book, opts):
+        oeb_book.logger.info('Converting XHTML to RB markup...')
+        self.oeb_book = oeb_book
+        self.opts = opts
+        return self.mlize_spine()
+
+
+    def mlize_spine(self):
+        output = u'<HTML><HEAD><TITLE></TITLE></HEAD><BODY>'
+        for item in self.oeb_book.spine:
+            stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
+            output += self.add_page_anchor(item.href)
+            output += self.dump_text(item.data.find(XHTML('body')), stylizer)
+        output += u'</BODY></HTML>'
+        output = self.clean_text(output)
+        return output
+
+    def add_page_anchor(self, href):
+        href = os.path.splitext(os.path.basename(href))[0]
+        return u'<A NAME="%s"></A>' % href
+
+    def clean_text(self, text):        
+        # Remove anchors that do not have links
+        anchors = set(re.findall(r'(?<=<A NAME=").+?(?="></A>)', text))
+        links = set(re.findall(r'(?<=<A HREF="#).+?(?=">)', text))
+        for unused in anchors.difference(links):
+            text = text.replace('<A NAME="%s"></A>' % unused, '')
+
+        return text
+
+    def dump_text(self, elem, stylizer, tag_stack=[]):
+        if not isinstance(elem.tag, basestring) \
+           or namespace(elem.tag) != XHTML_NS:
+            return u''
+
+        text = u''
+        style = stylizer.style(elem)
+
+        if style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') \
+           or style['visibility'] == 'hidden':
+            return u''
+
+        tag = barename(elem.tag)
+        tag_count = 0
+        
+        # Process tags that need special processing and that do not have inner
+        # text. Usually these require an argument
+        if tag == 'img':
+            src = os.path.basename(elem.get('src'))
+            name = self.name_map.get(src, src)
+            text += '<IMG SRC="%s">' % name
+
+        rb_tag = tag.upper() if tag in TAGS else None
+        if rb_tag:
+            tag_count += 1
+            text += '<%s>' % rb_tag
+            tag_stack.append(rb_tag)
+
+        if tag in LINK_TAGS:
+            href = elem.get('href')
+            if href:
+                if '://' not in href:
+                    if '#' in href:
+                        href = href.partition('#')[2]
+                    href = os.path.splitext(os.path.basename(href))[0]
+                tag_count += 1
+                text += '<A HREF="%s">' % href
+                tag_stack.append('A')
+
+        # Anchor ids
+        id_name = elem.get('id')
+        if id_name:
+            text += '<A NAME="%s"></A>' % os.path.splitext(id_name)[0]
+
+        # Processes style information
+        for s in STYLES:
+            style_tag = s[1].get(style[s[0]], None)
+            if style_tag:
+                style_tag = style_tag.upper()
+                tag_count += 1
+                text += '<%s>' % style_tag
+                tag_stack.append(style_tag)
+
+        # Proccess tags that contain text.
+        if hasattr(elem, 'text') and elem.text != None and elem.text.strip() != '':
+            text += elem.text
+
+        for item in elem:
+            text += self.dump_text(item, stylizer, tag_stack)
+
+        close_tag_list = []
+        for i in range(0, tag_count):
+            close_tag_list.insert(0, tag_stack.pop())
+
+        text += self.close_tags(close_tag_list)
+
+        if hasattr(elem, 'tail') and elem.tail != None and elem.tail.strip() != '':
+                text += elem.tail
+
+        return text
+
+    def close_tags(self, tags):
+        text = u''
+        for i in range(0, len(tags)):
+            tag = tags.pop()
+            text += '</%s>' % tag
+
+        return text
--- a/src/calibre/ebooks/rb/writer.py
+++ b/src/calibre/ebooks/rb/writer.py
@ -0,0 +1,143 @@
+import os.path
+# -*- coding: utf-8 -*-
+
+__license__ = 'GPL 3'
+__copyright__ = '2009, John Schember <john@nachtimwald.com>'
+__docformat__ = 'restructuredtext en'
+
+import os
+import struct
+import zlib
+
+import Image
+import cStringIO
+
+from calibre.ebooks.rb.rbml import RBMLizer
+from calibre.ebooks.rb import HEADER
+from calibre.ebooks.rb import unique_name
+from calibre.ebooks.oeb.base import OEB_IMAGES
+from calibre.constants import __appname__, __version__
+
+TEXT_RECORD_SIZE = 4096
+
+class TocItem(object):
+
+    def __init__(self, name, size, flags):
+        self.name = name
+        self.size = size
+        self.flags = flags
+
+
+class RBWriter(object):
+
+    def __init__(self, opts, log):
+        self.opts = opts
+        self.log = log
+        self.name_map = {}
+
+    def write_content(self, oeb_book, out_stream, metadata=None):
+        info = [('info.info', self._info_section(metadata))]
+        images = self._images(oeb_book.manifest)
+        text_size, chuncks = self._text(oeb_book)
+        chunck_sizes = [len(x) for x in chuncks]
+        text = [('index.html', chuncks)]
+        hidx = [('index.hidx', ' ')]
+
+        toc_items = []
+        page_count = 0
+        for name, data in info+text+hidx+images:
+            page_count += 1
+            size = len(data)
+            if (name, data) in text:
+                flags = 8
+                size = 0
+                for c in chunck_sizes:
+                    size += c
+                size += 8 + (len(chunck_sizes) * 4)
+            elif (name, data) in info:
+                flags = 2
+            else:
+                flags = 0
+            toc_items.append(TocItem(name.ljust(32, '\x00')[:32], size, flags))
+
+        out_stream.write(HEADER)
+        out_stream.write(struct.pack('<I', 0))
+        out_stream.write(struct.pack('<IH', 0, 0))
+        out_stream.write(struct.pack('<I', 0x128))
+        out_stream.write(struct.pack('<I', 0))
+        for i in range(0x20, 0x128, 4):
+            out_stream.write(struct.pack('<I', 0))
+        out_stream.write(struct.pack('<I', page_count))
+        offset = out_stream.tell() + (len(toc_items) * 44)
+        for item in toc_items:
+            out_stream.write(item.name)
+            out_stream.write(struct.pack('<I', item.size))
+            out_stream.write(struct.pack('<I', offset))
+            out_stream.write(struct.pack('<I', item.flags))
+            offset += item.size
+
+        out_stream.write(info[0][1])
+
+        # Compressed text with proper heading
+        out_stream.write(struct.pack('<I', len(text[0][1])))
+        out_stream.write(struct.pack('<I', text_size))
+        for size in chunck_sizes:
+            out_stream.write(struct.pack('<I', size))
+        for chunck in text[0][1]:
+            out_stream.write(chunck)
+
+        for item in hidx+images:
+            out_stream.write(item[1])
+
+        total_size = out_stream.tell()
+        out_stream.seek(0x1c)
+        out_stream.write(struct.pack('<I', total_size))
+
+    def _text(self, oeb_book):
+        rbmlizer = RBMLizer(name_map=self.name_map, ignore_tables=self.opts.linearize_tables)
+        text = rbmlizer.extract_content(oeb_book, self.opts).encode('cp1252', 'xmlcharrefreplace')
+        size = len(text)
+
+        pages = []
+        for i in range(0, (len(text) / TEXT_RECORD_SIZE) + 1):
+            pages.append(zlib.compress(text[i * TEXT_RECORD_SIZE : (i * TEXT_RECORD_SIZE) + TEXT_RECORD_SIZE], 9))
+
+        return (size, pages)
+
+    def _images(self, manifest):
+        images = []
+        used_names = []
+
+        for item in manifest:
+            if item.media_type in OEB_IMAGES:
+                data = ''
+
+                im = Image.open(cStringIO.StringIO(item.data)).convert('L')
+                data = cStringIO.StringIO()
+                im.save(data, 'PNG')
+                data = data.getvalue()
+
+                name = '%s.png' % os.path.splitext(os.path.basename(item.href))[0]
+                name = unique_name(name, used_names)
+                used_names.append(name)
+                self.name_map[os.path.basename(item.href)] = name
+                
+                images.append((name, data))
+
+        return images
+
+    def _info_section(self, metadata):
+        text = 'TYPE=2\n'
+        if metadata:
+            if len(metadata.title) >= 1:
+                text += 'TITLE=%s\n' % metadata.title[0].value
+            if len(metadata.creator) >= 1:
+                from calibre.ebooks.metadata import authors_to_string
+                text += 'AUTHOR=%s\n' % authors_to_string([x.value for x in metadata.creator])
+        text += 'GENERATOR=%s - %s\n' % (__appname__, __version__)
+        text += 'PARSE=1\n'
+        text += 'OUTPUT=1\n'
+        text += 'BODY=index.html\n'
+
+        return text
+