RocketBook (rb) input.

This commit is contained in:
John Schember 2009-05-23 11:55:53 -04:00
parent 9b890e279d
commit 503b697653
5 changed files with 168 additions and 2 deletions

View File

@ -321,6 +321,7 @@ from calibre.ebooks.lit.input import LITInput
from calibre.ebooks.fb2.input import FB2Input from calibre.ebooks.fb2.input import FB2Input
from calibre.ebooks.fb2.output import FB2Output from calibre.ebooks.fb2.output import FB2Output
from calibre.ebooks.odt.input import ODTInput from calibre.ebooks.odt.input import ODTInput
from calibre.ebooks.rb.input import RBInput
from calibre.ebooks.rtf.input import RTFInput from calibre.ebooks.rtf.input import RTFInput
from calibre.ebooks.html.input import HTMLInput from calibre.ebooks.html.input import HTMLInput
from calibre.ebooks.comic.input import ComicInput from calibre.ebooks.comic.input import ComicInput
@ -351,7 +352,7 @@ from calibre.devices.bebook.driver import BEBOOK, BEBOOK_MINI
plugins = [HTML2ZIP, EPUBInput, MOBIInput, PDBInput, PDFInput, HTMLInput, plugins = [HTML2ZIP, EPUBInput, MOBIInput, PDBInput, PDFInput, HTMLInput,
TXTInput, OEBOutput, TXTOutput, PDFOutput, LITInput, ComicInput, TXTInput, OEBOutput, TXTOutput, PDFOutput, LITInput, ComicInput,
FB2Input, FB2Output, ODTInput, RTFInput, EPUBOutput, RecipeInput, PMLInput, FB2Input, FB2Output, ODTInput, RTFInput, EPUBOutput, RecipeInput, PMLInput,
PMLOutput, MOBIOutput, PDBOutput, LRFOutput, LITOutput] PMLOutput, MOBIOutput, PDBOutput, LRFOutput, LITOutput, RBInput]
plugins += [PRS500, PRS505, PRS700, CYBOOKG3, KINDLE, KINDLE2, BLACKBERRY, plugins += [PRS500, PRS505, PRS700, CYBOOKG3, KINDLE, KINDLE2, BLACKBERRY,
EB600, JETBOOK, BEBOOK, BEBOOK_MINI] EB600, JETBOOK, BEBOOK, BEBOOK_MINI]
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \ plugins += [x for x in list(locals().values()) if isinstance(x, type) and \

View File

@ -0,0 +1,11 @@
# -*- coding: utf-8 -*-
__license__ = 'GPL 3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
HEADER = '\xb0\x0c\xb0\x0c\x02\x00NUVO\x00\x00\x00\x00'
class RocketBookError(Exception):
pass

View File

@ -0,0 +1,24 @@
# -*- coding: utf-8 -*-
__license__ = 'GPL 3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
import os
from calibre.ebooks.rb.reader import Reader
from calibre.customize.conversion import InputFormatPlugin
class RBInput(InputFormatPlugin):
name = 'RB Input'
author = 'John Schember'
description = 'Convert RB files to HTML'
file_types = set(['rb'])
def convert(self, stream, options, file_ext, log,
accelerators):
reader = Reader(stream, log, options.input_encoding)
opf = reader.extract_content(os.getcwd())
return opf

View File

@ -0,0 +1,131 @@
import os.path
import zlib
# -*- coding: utf-8 -*-
__license__ = 'GPL 3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
import os
import struct
from urllib import unquote as urlunquote
from calibre import CurrentDir
from calibre.ebooks.rb import HEADER
from calibre.ebooks.rb import RocketBookError
from calibre.ebooks.metadata.rb import get_metadata
from calibre.ebooks.metadata.opf2 import OPFCreator
class RBToc(list):
class Item(object):
def __init__(self, name='', size=0, offset=0, flags=0):
self.name = name
self.size = size
self.offset = offset
self.flags = flags
class Reader(object):
def __init__(self, stream, log, encoding=None):
self.stream = stream
self.log = log
self.encoding = encoding
self.verify_file()
self.mi = get_metadata(self.stream)
self.toc = self.get_toc()
def read_i32(self):
return struct.unpack('<I', self.stream.read(4))[0]
def verify_file(self):
self.stream.seek(0)
if self.stream.read(14) != HEADER:
raise RocketBookError('Could not read file: %s. Does not contain a valid RocketBook Header.' % self.stream.name)
self.stream.seek(28)
size = self.read_i32()
self.stream.seek(0, os.SEEK_END)
real_size = self.stream.tell()
if size != real_size:
raise RocketBookError('File is corrupt. The file size recorded in the header does not match the actual file size.')
def get_toc(self):
self.stream.seek(24)
toc_offset = self.read_i32()
self.stream.seek(toc_offset)
pages = self.read_i32()
toc = RBToc()
for i in range(pages):
name = urlunquote(self.stream.read(32).strip('\x00'))
size, offset, flags = self.read_i32(), self.read_i32(), self.read_i32()
toc.append(RBToc.Item(name=name, size=size, offset=offset, flags=flags))
return toc
def get_text(self, toc_item, output_dir):
if toc_item.flags != 8:
return
output = u''
self.stream.seek(toc_item.offset)
count = self.read_i32()
self.read_i32() # Uncompressed size.
chunck_sizes = []
for i in range(count):
chunck_sizes.append(self.read_i32())
for size in chunck_sizes:
cm_chunck = self.stream.read(size)
output += zlib.decompress(cm_chunck).decode('cp1252' if self.encoding is None else self.encoding)
with open(os.path.join(output_dir, toc_item.name), 'wb') as html:
html.write(output.encode('utf-8'))
def get_image(self, toc_item, output_dir):
if toc_item.flags != 0:
return
self.stream.seek(toc_item.offset)
data = self.stream.read(toc_item.size)
with open(os.path.join(output_dir, toc_item.name), 'wb') as img:
img.write(data)
def extract_content(self, output_dir):
html = []
images = []
for item in self.toc:
if item.name.lower().endswith('html'):
html.append(item.name)
self.get_text(item, output_dir)
if item.name.lower().endswith('png'):
images.append(item.name)
self.get_image(item, output_dir)
opf_path = self.create_opf(output_dir, html, images)
return opf_path
def create_opf(self, output_dir, pages, images):
with CurrentDir(output_dir):
opf = OPFCreator(output_dir, self.mi)
manifest = []
for page in pages+images:
manifest.append((page, None))
opf.create_manifest(manifest)
opf.create_spine(pages)
with open('metadata.opf', 'wb') as opffile:
opf.render(opffile)
return os.path.join(output_dir, 'metadata.opf')

View File

@ -1,5 +1,4 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from __future__ import with_statement
__license__ = 'GPL 3' __license__ = 'GPL 3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>' __copyright__ = '2009, John Schember <john@nachtimwald.com>'