RB: output.

2025-07-09 03:04:10 -04:00 · 2009-05-23 19:26:21 -04:00 · 2009-05-23 19:26:21 -04:00 · 3659eb1b7a
commit 3659eb1b7a
parent afe9c08304
5 changed files with 384 additions and 24 deletions
--- a/src/calibre/ebooks/pdb/ereader/writer.py
+++ b/src/calibre/ebooks/pdb/ereader/writer.py
@ -99,7 +99,7 @@ class Writer(FormatWriter):
        publisher = ''
        isbn = ''
-        if metadata != None:
+        if metadata:
            if len(metadata.title) >= 1:
                title = metadata.title[0].value
            if len(metadata.creator) >= 1:
--- a/src/calibre/ebooks/rb/init.py
+++ b/src/calibre/ebooks/rb/init.py
@ -4,8 +4,23 @@ __license__ = 'GPL 3'
 __copyright__ = '2009, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
 import os
 HEADER = '\xb0\x0c\xb0\x0c\x02\x00NUVO\x00\x00\x00\x00'
 class RocketBookError(Exception):
    pass
 def unique_name(name, used_names):
    name = os.path.basename(name)
    if len(name) < 32 and name not in used_names:
        return name
    else:
        ext = os.path.splitext(name)[1][:3]
        base_name = name[:22]
        for i in range(0, 9999):
            name = '%s-%s.%s' % (str(i).rjust('0', 4)[:4], base_name, ext)
            if name not in used_names:
                break
        return name
--- a/src/calibre/ebooks/rb/output.py
+++ b/src/calibre/ebooks/rb/output.py
@ -0,0 +1,36 @@
 # -*- coding: utf-8 -*-
 __license__ = 'GPL 3'
 __copyright__ = '2009, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
 import os
 from calibre.customize.conversion import OutputFormatPlugin
 from calibre.ebooks.rb.writer import RBWriter
 class RBOutput(OutputFormatPlugin):
    name = 'RB Output'
    author = 'John Schember'
    file_type = 'rb'
    def convert(self, oeb_book, output_path, input_plugin, opts, log):
        close = False
        if not hasattr(output_path, 'write'):
            close = True
            if not os.path.exists(os.path.dirname(output_path)) and os.path.dirname(output_path) != '':
                os.makedirs(os.path.dirname(output_path))
            out_stream = open(output_path, 'wb')
        else:
            out_stream = output_path
        writer = RBWriter(opts, log)
        out_stream.seek(0)
        out_stream.truncate()
        writer.write_content(oeb_book, out_stream, oeb_book.metadata)
        if close:
            out_stream.close()
--- a/src/calibre/ebooks/rb/rbml.py
+++ b/src/calibre/ebooks/rb/rbml.py
@ -0,0 +1,166 @@
 # -*- coding: utf-8 -*-
 __license__ = 'GPL 3'
 __copyright__ = '2009, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
 '''
 Transform OEB content into RB compatible markup.
 '''
 import os
 import re
 from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace
 from calibre.ebooks.oeb.stylizer import Stylizer
 TAGS = [
    'b',
    'big',
    'blockquote',
    'br',
    'center',
    'code',
    'div',
    'h1',
    'h2',
    'h3',
    'h4',
    'h5',
    'h6',
    'hr',
    'i',
    'li',
    'ol',
    'p',
    'pre',
    'small',
    'sub',
    'sup',
    'ul',
 ]
 LINK_TAGS = [
    'a',
 ]
 STYLES = [
    ('font-weight', {'bold'   : 'b', 'bolder' : 'b'}),
    ('font-style', {'italic' : 'i'}),
    ('text-align', {'center' : 'center'}),
 ]
 class RBMLizer(object):
    def __init__(self, name_map={}, ignore_tables=False):
        self.name_map = name_map
        self.ignore_tables = ignore_tables
    def extract_content(self, oeb_book, opts):
        oeb_book.logger.info('Converting XHTML to RB markup...')
        self.oeb_book = oeb_book
        self.opts = opts
        return self.mlize_spine()
    def mlize_spine(self):
        output = u'<HTML><HEAD><TITLE></TITLE></HEAD><BODY>'
        for item in self.oeb_book.spine:
            stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
            output += self.add_page_anchor(item.href)
            output += self.dump_text(item.data.find(XHTML('body')), stylizer)
        output += u'</BODY></HTML>'
        output = self.clean_text(output)
        return output
    def add_page_anchor(self, href):
        href = os.path.splitext(os.path.basename(href))[0]
        return u'<A NAME="%s"></A>' % href
    def clean_text(self, text):        
        # Remove anchors that do not have links
        anchors = set(re.findall(r'(?<=<A NAME=").+?(?="></A>)', text))
        links = set(re.findall(r'(?<=<A HREF="#).+?(?=">)', text))
        for unused in anchors.difference(links):
            text = text.replace('<A NAME="%s"></A>' % unused, '')
        return text
    def dump_text(self, elem, stylizer, tag_stack=[]):
        if not isinstance(elem.tag, basestring) \
           or namespace(elem.tag) != XHTML_NS:
            return u''
        text = u''
        style = stylizer.style(elem)
        if style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') \
           or style['visibility'] == 'hidden':
            return u''
        tag = barename(elem.tag)
        tag_count = 0
        # Process tags that need special processing and that do not have inner
        # text. Usually these require an argument
        if tag == 'img':
            src = os.path.basename(elem.get('src'))
            name = self.name_map.get(src, src)
            text += '<IMG SRC="%s">' % name
        rb_tag = tag.upper() if tag in TAGS else None
        if rb_tag:
            tag_count += 1
            text += '<%s>' % rb_tag
            tag_stack.append(rb_tag)
        if tag in LINK_TAGS:
            href = elem.get('href')
            if href:
                if '://' not in href:
                    if '#' in href:
                        href = href.partition('#')[2]
                    href = os.path.splitext(os.path.basename(href))[0]
                tag_count += 1
                text += '<A HREF="%s">' % href
                tag_stack.append('A')
        # Anchor ids
        id_name = elem.get('id')
        if id_name:
            text += '<A NAME="%s"></A>' % os.path.splitext(id_name)[0]
        # Processes style information
        for s in STYLES:
            style_tag = s[1].get(style[s[0]], None)
            if style_tag:
                style_tag = style_tag.upper()
                tag_count += 1
                text += '<%s>' % style_tag
                tag_stack.append(style_tag)
        # Proccess tags that contain text.
        if hasattr(elem, 'text') and elem.text != None and elem.text.strip() != '':
            text += elem.text
        for item in elem:
            text += self.dump_text(item, stylizer, tag_stack)
        close_tag_list = []
        for i in range(0, tag_count):
            close_tag_list.insert(0, tag_stack.pop())
        text += self.close_tags(close_tag_list)
        if hasattr(elem, 'tail') and elem.tail != None and elem.tail.strip() != '':
                text += elem.tail
        return text
    def close_tags(self, tags):
        text = u''
        for i in range(0, len(tags)):
            tag = tags.pop()
            text += '</%s>' % tag
        return text
--- a/src/calibre/ebooks/rb/writer.py
+++ b/src/calibre/ebooks/rb/writer.py
@ -0,0 +1,143 @@
 import os.path
 # -*- coding: utf-8 -*-
 __license__ = 'GPL 3'
 __copyright__ = '2009, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
 import os
 import struct
 import zlib
 import Image
 import cStringIO
 from calibre.ebooks.rb.rbml import RBMLizer
 from calibre.ebooks.rb import HEADER
 from calibre.ebooks.rb import unique_name
 from calibre.ebooks.oeb.base import OEB_IMAGES
 from calibre.constants import __appname__, __version__
 TEXT_RECORD_SIZE = 4096
 class TocItem(object):
    def __init__(self, name, size, flags):
        self.name = name
        self.size = size
        self.flags = flags
 class RBWriter(object):
    def __init__(self, opts, log):
        self.opts = opts
        self.log = log
        self.name_map = {}
    def write_content(self, oeb_book, out_stream, metadata=None):
        info = [('info.info', self._info_section(metadata))]
        images = self._images(oeb_book.manifest)
        text_size, chuncks = self._text(oeb_book)
        chunck_sizes = [len(x) for x in chuncks]
        text = [('index.html', chuncks)]
        hidx = [('index.hidx', ' ')]
        toc_items = []
        page_count = 0
        for name, data in info+text+hidx+images:
            page_count += 1
            size = len(data)
            if (name, data) in text:
                flags = 8
                size = 0
                for c in chunck_sizes:
                    size += c
                size += 8 + (len(chunck_sizes) * 4)
            elif (name, data) in info:
                flags = 2
            else:
                flags = 0
            toc_items.append(TocItem(name.ljust(32, '\x00')[:32], size, flags))
        out_stream.write(HEADER)
        out_stream.write(struct.pack('<I', 0))
        out_stream.write(struct.pack('<IH', 0, 0))
        out_stream.write(struct.pack('<I', 0x128))
        out_stream.write(struct.pack('<I', 0))
        for i in range(0x20, 0x128, 4):
            out_stream.write(struct.pack('<I', 0))
        out_stream.write(struct.pack('<I', page_count))
        offset = out_stream.tell() + (len(toc_items) * 44)
        for item in toc_items:
            out_stream.write(item.name)
            out_stream.write(struct.pack('<I', item.size))
            out_stream.write(struct.pack('<I', offset))
            out_stream.write(struct.pack('<I', item.flags))
            offset += item.size
        out_stream.write(info[0][1])
        # Compressed text with proper heading
        out_stream.write(struct.pack('<I', len(text[0][1])))
        out_stream.write(struct.pack('<I', text_size))
        for size in chunck_sizes:
            out_stream.write(struct.pack('<I', size))
        for chunck in text[0][1]:
            out_stream.write(chunck)
        for item in hidx+images:
            out_stream.write(item[1])
        total_size = out_stream.tell()
        out_stream.seek(0x1c)
        out_stream.write(struct.pack('<I', total_size))
    def _text(self, oeb_book):
        rbmlizer = RBMLizer(name_map=self.name_map, ignore_tables=self.opts.linearize_tables)
        text = rbmlizer.extract_content(oeb_book, self.opts).encode('cp1252', 'xmlcharrefreplace')
        size = len(text)
        pages = []
        for i in range(0, (len(text) / TEXT_RECORD_SIZE) + 1):
            pages.append(zlib.compress(text[i * TEXT_RECORD_SIZE : (i * TEXT_RECORD_SIZE) + TEXT_RECORD_SIZE], 9))
        return (size, pages)
    def _images(self, manifest):
        images = []
        used_names = []
        for item in manifest:
            if item.media_type in OEB_IMAGES:
                data = ''
                im = Image.open(cStringIO.StringIO(item.data)).convert('L')
                data = cStringIO.StringIO()
                im.save(data, 'PNG')
                data = data.getvalue()
                name = '%s.png' % os.path.splitext(os.path.basename(item.href))[0]
                name = unique_name(name, used_names)
                used_names.append(name)
                self.name_map[os.path.basename(item.href)] = name
                images.append((name, data))
        return images
    def _info_section(self, metadata):
        text = 'TYPE=2\n'
        if metadata:
            if len(metadata.title) >= 1:
                text += 'TITLE=%s\n' % metadata.title[0].value
            if len(metadata.creator) >= 1:
                from calibre.ebooks.metadata import authors_to_string
                text += 'AUTHOR=%s\n' % authors_to_string([x.value for x in metadata.creator])
        text += 'GENERATOR=%s - %s\n' % (__appname__, __version__)
        text += 'PARSE=1\n'
        text += 'OUTPUT=1\n'
        text += 'BODY=index.html\n'
        return text