mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
RocketBook (rb) input.
This commit is contained in:
parent
9b890e279d
commit
503b697653
@ -321,6 +321,7 @@ from calibre.ebooks.lit.input import LITInput
|
|||||||
from calibre.ebooks.fb2.input import FB2Input
|
from calibre.ebooks.fb2.input import FB2Input
|
||||||
from calibre.ebooks.fb2.output import FB2Output
|
from calibre.ebooks.fb2.output import FB2Output
|
||||||
from calibre.ebooks.odt.input import ODTInput
|
from calibre.ebooks.odt.input import ODTInput
|
||||||
|
from calibre.ebooks.rb.input import RBInput
|
||||||
from calibre.ebooks.rtf.input import RTFInput
|
from calibre.ebooks.rtf.input import RTFInput
|
||||||
from calibre.ebooks.html.input import HTMLInput
|
from calibre.ebooks.html.input import HTMLInput
|
||||||
from calibre.ebooks.comic.input import ComicInput
|
from calibre.ebooks.comic.input import ComicInput
|
||||||
@ -351,7 +352,7 @@ from calibre.devices.bebook.driver import BEBOOK, BEBOOK_MINI
|
|||||||
plugins = [HTML2ZIP, EPUBInput, MOBIInput, PDBInput, PDFInput, HTMLInput,
|
plugins = [HTML2ZIP, EPUBInput, MOBIInput, PDBInput, PDFInput, HTMLInput,
|
||||||
TXTInput, OEBOutput, TXTOutput, PDFOutput, LITInput, ComicInput,
|
TXTInput, OEBOutput, TXTOutput, PDFOutput, LITInput, ComicInput,
|
||||||
FB2Input, FB2Output, ODTInput, RTFInput, EPUBOutput, RecipeInput, PMLInput,
|
FB2Input, FB2Output, ODTInput, RTFInput, EPUBOutput, RecipeInput, PMLInput,
|
||||||
PMLOutput, MOBIOutput, PDBOutput, LRFOutput, LITOutput]
|
PMLOutput, MOBIOutput, PDBOutput, LRFOutput, LITOutput, RBInput]
|
||||||
plugins += [PRS500, PRS505, PRS700, CYBOOKG3, KINDLE, KINDLE2, BLACKBERRY,
|
plugins += [PRS500, PRS505, PRS700, CYBOOKG3, KINDLE, KINDLE2, BLACKBERRY,
|
||||||
EB600, JETBOOK, BEBOOK, BEBOOK_MINI]
|
EB600, JETBOOK, BEBOOK, BEBOOK_MINI]
|
||||||
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
|
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
|
||||||
|
11
src/calibre/ebooks/rb/__init__.py
Normal file
11
src/calibre/ebooks/rb/__init__.py
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
__license__ = 'GPL 3'
|
||||||
|
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
HEADER = '\xb0\x0c\xb0\x0c\x02\x00NUVO\x00\x00\x00\x00'
|
||||||
|
|
||||||
|
class RocketBookError(Exception):
|
||||||
|
pass
|
||||||
|
|
24
src/calibre/ebooks/rb/input.py
Normal file
24
src/calibre/ebooks/rb/input.py
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
__license__ = 'GPL 3'
|
||||||
|
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
import os
|
||||||
|
|
||||||
|
from calibre.ebooks.rb.reader import Reader
|
||||||
|
from calibre.customize.conversion import InputFormatPlugin
|
||||||
|
|
||||||
|
class RBInput(InputFormatPlugin):
|
||||||
|
|
||||||
|
name = 'RB Input'
|
||||||
|
author = 'John Schember'
|
||||||
|
description = 'Convert RB files to HTML'
|
||||||
|
file_types = set(['rb'])
|
||||||
|
|
||||||
|
def convert(self, stream, options, file_ext, log,
|
||||||
|
accelerators):
|
||||||
|
reader = Reader(stream, log, options.input_encoding)
|
||||||
|
opf = reader.extract_content(os.getcwd())
|
||||||
|
|
||||||
|
return opf
|
131
src/calibre/ebooks/rb/reader.py
Normal file
131
src/calibre/ebooks/rb/reader.py
Normal file
@ -0,0 +1,131 @@
|
|||||||
|
import os.path
|
||||||
|
import zlib
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
__license__ = 'GPL 3'
|
||||||
|
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
import os
|
||||||
|
import struct
|
||||||
|
from urllib import unquote as urlunquote
|
||||||
|
|
||||||
|
from calibre import CurrentDir
|
||||||
|
from calibre.ebooks.rb import HEADER
|
||||||
|
from calibre.ebooks.rb import RocketBookError
|
||||||
|
from calibre.ebooks.metadata.rb import get_metadata
|
||||||
|
from calibre.ebooks.metadata.opf2 import OPFCreator
|
||||||
|
|
||||||
|
class RBToc(list):
|
||||||
|
|
||||||
|
class Item(object):
|
||||||
|
|
||||||
|
def __init__(self, name='', size=0, offset=0, flags=0):
|
||||||
|
self.name = name
|
||||||
|
self.size = size
|
||||||
|
self.offset = offset
|
||||||
|
self.flags = flags
|
||||||
|
|
||||||
|
|
||||||
|
class Reader(object):
|
||||||
|
|
||||||
|
def __init__(self, stream, log, encoding=None):
|
||||||
|
self.stream = stream
|
||||||
|
self.log = log
|
||||||
|
self.encoding = encoding
|
||||||
|
|
||||||
|
self.verify_file()
|
||||||
|
|
||||||
|
self.mi = get_metadata(self.stream)
|
||||||
|
self.toc = self.get_toc()
|
||||||
|
|
||||||
|
def read_i32(self):
|
||||||
|
return struct.unpack('<I', self.stream.read(4))[0]
|
||||||
|
|
||||||
|
def verify_file(self):
|
||||||
|
self.stream.seek(0)
|
||||||
|
if self.stream.read(14) != HEADER:
|
||||||
|
raise RocketBookError('Could not read file: %s. Does not contain a valid RocketBook Header.' % self.stream.name)
|
||||||
|
|
||||||
|
self.stream.seek(28)
|
||||||
|
size = self.read_i32()
|
||||||
|
self.stream.seek(0, os.SEEK_END)
|
||||||
|
real_size = self.stream.tell()
|
||||||
|
if size != real_size:
|
||||||
|
raise RocketBookError('File is corrupt. The file size recorded in the header does not match the actual file size.')
|
||||||
|
|
||||||
|
def get_toc(self):
|
||||||
|
self.stream.seek(24)
|
||||||
|
toc_offset = self.read_i32()
|
||||||
|
|
||||||
|
self.stream.seek(toc_offset)
|
||||||
|
pages = self.read_i32()
|
||||||
|
|
||||||
|
toc = RBToc()
|
||||||
|
for i in range(pages):
|
||||||
|
name = urlunquote(self.stream.read(32).strip('\x00'))
|
||||||
|
size, offset, flags = self.read_i32(), self.read_i32(), self.read_i32()
|
||||||
|
toc.append(RBToc.Item(name=name, size=size, offset=offset, flags=flags))
|
||||||
|
|
||||||
|
return toc
|
||||||
|
|
||||||
|
def get_text(self, toc_item, output_dir):
|
||||||
|
if toc_item.flags != 8:
|
||||||
|
return
|
||||||
|
|
||||||
|
output = u''
|
||||||
|
|
||||||
|
self.stream.seek(toc_item.offset)
|
||||||
|
count = self.read_i32()
|
||||||
|
self.read_i32() # Uncompressed size.
|
||||||
|
chunck_sizes = []
|
||||||
|
for i in range(count):
|
||||||
|
chunck_sizes.append(self.read_i32())
|
||||||
|
|
||||||
|
for size in chunck_sizes:
|
||||||
|
cm_chunck = self.stream.read(size)
|
||||||
|
output += zlib.decompress(cm_chunck).decode('cp1252' if self.encoding is None else self.encoding)
|
||||||
|
|
||||||
|
with open(os.path.join(output_dir, toc_item.name), 'wb') as html:
|
||||||
|
html.write(output.encode('utf-8'))
|
||||||
|
|
||||||
|
def get_image(self, toc_item, output_dir):
|
||||||
|
if toc_item.flags != 0:
|
||||||
|
return
|
||||||
|
|
||||||
|
self.stream.seek(toc_item.offset)
|
||||||
|
data = self.stream.read(toc_item.size)
|
||||||
|
|
||||||
|
with open(os.path.join(output_dir, toc_item.name), 'wb') as img:
|
||||||
|
img.write(data)
|
||||||
|
|
||||||
|
def extract_content(self, output_dir):
|
||||||
|
html = []
|
||||||
|
images = []
|
||||||
|
|
||||||
|
for item in self.toc:
|
||||||
|
if item.name.lower().endswith('html'):
|
||||||
|
html.append(item.name)
|
||||||
|
self.get_text(item, output_dir)
|
||||||
|
if item.name.lower().endswith('png'):
|
||||||
|
images.append(item.name)
|
||||||
|
self.get_image(item, output_dir)
|
||||||
|
|
||||||
|
opf_path = self.create_opf(output_dir, html, images)
|
||||||
|
|
||||||
|
return opf_path
|
||||||
|
|
||||||
|
def create_opf(self, output_dir, pages, images):
|
||||||
|
with CurrentDir(output_dir):
|
||||||
|
opf = OPFCreator(output_dir, self.mi)
|
||||||
|
|
||||||
|
manifest = []
|
||||||
|
for page in pages+images:
|
||||||
|
manifest.append((page, None))
|
||||||
|
|
||||||
|
opf.create_manifest(manifest)
|
||||||
|
opf.create_spine(pages)
|
||||||
|
with open('metadata.opf', 'wb') as opffile:
|
||||||
|
opf.render(opffile)
|
||||||
|
|
||||||
|
return os.path.join(output_dir, 'metadata.opf')
|
@ -1,5 +1,4 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
from __future__ import with_statement
|
|
||||||
|
|
||||||
__license__ = 'GPL 3'
|
__license__ = 'GPL 3'
|
||||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||||
|
Loading…
x
Reference in New Issue
Block a user