mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Merge current state of oeb2mobi.
This commit is contained in:
commit
a8060652c8
@ -25,7 +25,8 @@ from calibre.ebooks.lit.reader import DirectoryEntry
|
||||
import calibre.ebooks.lit.maps as maps
|
||||
from calibre.ebooks.oeb.base import OEB_DOCS, OEB_STYLES, OEB_CSS_MIME, \
|
||||
CSS_MIME, OPF_MIME, XML_NS, XML
|
||||
from calibre.ebooks.oeb.base import namespace, barename, urlnormalize, xpath
|
||||
from calibre.ebooks.oeb.base import namespace, barename, prefixname, \
|
||||
urlnormalize, xpath
|
||||
from calibre.ebooks.oeb.base import FauxLogger, OEBBook
|
||||
from calibre.ebooks.oeb.stylizer import Stylizer
|
||||
from calibre.ebooks.lit.lzx import Compressor
|
||||
@ -116,12 +117,6 @@ LZXC_CONTROL = \
|
||||
|
||||
COLLAPSE = re.compile(r'[ \t\r\n\v]+')
|
||||
|
||||
def prefixname(name, nsrmap):
|
||||
prefix = nsrmap[namespace(name)]
|
||||
if not prefix:
|
||||
return barename(name)
|
||||
return ':'.join((prefix, barename(name)))
|
||||
|
||||
def decint(value):
|
||||
bytes = []
|
||||
while True:
|
||||
|
@ -3,6 +3,8 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
from struct import pack
|
||||
|
||||
main_language = {
|
||||
0 : "NEUTRAL",
|
||||
54 : "AFRIKAANS",
|
||||
@ -155,5 +157,168 @@ sub_language = {
|
||||
2 : "SWEDISH_FINLAND",
|
||||
1 : "UZBEK_LATIN",
|
||||
2 : "UZBEK_CYRILLIC",
|
||||
|
||||
}
|
||||
|
||||
IANA_MOBI = \
|
||||
{None: {None: (0, 0)},
|
||||
'af': {None: (54, 0)},
|
||||
'ar': {None: (1, 0),
|
||||
'AE': (1, 56),
|
||||
'BH': (1, 60),
|
||||
'DZ': (1, 20),
|
||||
'EG': (1, 12),
|
||||
'JO': (1, 44),
|
||||
'KW': (1, 52),
|
||||
'LB': (1, 48),
|
||||
'MA': (1, 24),
|
||||
'OM': (1, 32),
|
||||
'QA': (1, 64),
|
||||
'SA': (1, 4),
|
||||
'SY': (1, 40),
|
||||
'TN': (1, 28),
|
||||
'YE': (1, 36)},
|
||||
'as': {None: (77, 0)},
|
||||
'az': {None: (44, 0)},
|
||||
'be': {None: (35, 0)},
|
||||
'bg': {None: (2, 0)},
|
||||
'bn': {None: (69, 0)},
|
||||
'ca': {None: (3, 0)},
|
||||
'cs': {None: (5, 0)},
|
||||
'da': {None: (6, 0)},
|
||||
'de': {None: (7, 0),
|
||||
'AT': (7, 12),
|
||||
'CH': (7, 8),
|
||||
'LI': (7, 20),
|
||||
'LU': (7, 16)},
|
||||
'el': {None: (8, 0)},
|
||||
'en': {None: (9, 0),
|
||||
'AU': (9, 12),
|
||||
'BZ': (9, 40),
|
||||
'CA': (9, 16),
|
||||
'GB': (9, 8),
|
||||
'IE': (9, 24),
|
||||
'JM': (9, 32),
|
||||
'NZ': (9, 20),
|
||||
'PH': (9, 52),
|
||||
'TT': (9, 44),
|
||||
'US': (9, 4),
|
||||
'ZA': (9, 28),
|
||||
'ZW': (9, 48)},
|
||||
'es': {None: (10, 0),
|
||||
'AR': (10, 44),
|
||||
'BO': (10, 64),
|
||||
'CL': (10, 52),
|
||||
'CO': (10, 36),
|
||||
'CR': (10, 20),
|
||||
'DO': (10, 28),
|
||||
'EC': (10, 48),
|
||||
'ES': (10, 4),
|
||||
'GT': (10, 16),
|
||||
'HN': (10, 72),
|
||||
'MX': (10, 8),
|
||||
'NI': (10, 76),
|
||||
'PA': (10, 24),
|
||||
'PE': (10, 40),
|
||||
'PR': (10, 80),
|
||||
'PY': (10, 60),
|
||||
'SV': (10, 68),
|
||||
'UY': (10, 56),
|
||||
'VE': (10, 32)},
|
||||
'et': {None: (37, 0)},
|
||||
'eu': {None: (45, 0)},
|
||||
'fa': {None: (41, 0)},
|
||||
'fi': {None: (11, 0)},
|
||||
'fo': {None: (56, 0)},
|
||||
'fr': {None: (12, 0),
|
||||
'BE': (12, 8),
|
||||
'CA': (12, 12),
|
||||
'CH': (12, 16),
|
||||
'FR': (12, 4),
|
||||
'LU': (12, 20),
|
||||
'MC': (12, 24)},
|
||||
'gu': {None: (71, 0)},
|
||||
'he': {None: (13, 0)},
|
||||
'hi': {None: (57, 0)},
|
||||
'hr': {None: (26, 0)},
|
||||
'hu': {None: (14, 0)},
|
||||
'hy': {None: (43, 0)},
|
||||
'id': {None: (33, 0)},
|
||||
'is': {None: (15, 0)},
|
||||
'it': {None: (16, 0),
|
||||
'CH': (16, 8),
|
||||
'IT': (16, 4)},
|
||||
'ja': {None: (17, 0)},
|
||||
'ka': {None: (55, 0)},
|
||||
'kk': {None: (63, 0)},
|
||||
'kn': {None: (75, 0)},
|
||||
'ko': {None: (18, 0)},
|
||||
'kok': {None: (87, 0)},
|
||||
'lt': {None: (39, 0)},
|
||||
'lv': {None: (38, 0)},
|
||||
'mk': {None: (47, 0)},
|
||||
'ml': {None: (76, 0)},
|
||||
'mr': {None: (78, 0)},
|
||||
'ms': {None: (62, 0)},
|
||||
'mt': {None: (58, 0)},
|
||||
'ne': {None: (97, 0)},
|
||||
'nl': {None: (19, 0),
|
||||
'BE': (19, 8)},
|
||||
'no': {None: (20, 0)},
|
||||
'or': {None: (72, 0)},
|
||||
'pa': {None: (70, 0)},
|
||||
'pl': {None: (21, 0)},
|
||||
'pt': {None: (22, 0),
|
||||
'BR': (22, 4),
|
||||
'PT': (22, 8)},
|
||||
'rm': {None: (23, 0)},
|
||||
'ro': {None: (24, 0)},
|
||||
'ru': {None: (25, 0)},
|
||||
'sa': {None: (79, 0)},
|
||||
'se': {None: (59, 0)},
|
||||
'sk': {None: (27, 0)},
|
||||
'sl': {None: (36, 0)},
|
||||
'sq': {None: (28, 0)},
|
||||
'sr': {None: (26, 12),
|
||||
'RS': (26, 12)},
|
||||
'st': {None: (48, 0)},
|
||||
'sv': {None: (29, 0),
|
||||
'FI': (29, 8)},
|
||||
'sw': {None: (65, 0)},
|
||||
'ta': {None: (73, 0)},
|
||||
'te': {None: (74, 0)},
|
||||
'th': {None: (30, 0)},
|
||||
'tn': {None: (50, 0)},
|
||||
'tr': {None: (31, 0)},
|
||||
'ts': {None: (49, 0)},
|
||||
'tt': {None: (68, 0)},
|
||||
'uk': {None: (34, 0)},
|
||||
'ur': {None: (32, 0)},
|
||||
'uz': {None: (67, 0),
|
||||
'UZ': (67, 8)},
|
||||
'vi': {None: (42, 0)},
|
||||
'wen': {None: (46, 0)},
|
||||
'xh': {None: (52, 0)},
|
||||
'zh': {None: (4, 0),
|
||||
'CN': (4, 8),
|
||||
'HK': (4, 12),
|
||||
'SG': (4, 16),
|
||||
'TW': (4, 4)},
|
||||
'zu': {None: (53, 0)}}
|
||||
|
||||
def iana2mobi(self, icode):
|
||||
subtags = list(code.split('-'))
|
||||
langdict = IANA_MOBI[None]
|
||||
while len(subtags) > 0:
|
||||
lang = subtags.pop(0).lower()
|
||||
if lang in IANA_MOBI:
|
||||
langdict = IANA_MOBI[lang]
|
||||
break
|
||||
mcode = langdict[None]
|
||||
while len(subtags) > 0:
|
||||
subtag = subtags.pop(0)
|
||||
if subtag not in langdict:
|
||||
subtag = subtag.upper()
|
||||
if subtag in langdict:
|
||||
mcode = langdict[subtag]
|
||||
break
|
||||
return pack('>HBB', 0, mcode[1], mcode[0])
|
||||
|
@ -2,7 +2,11 @@
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net> ' \
|
||||
'and Marshall T. Vandegrift <llasram@gmail.com>'
|
||||
|
||||
from cStringIO import StringIO
|
||||
from struct import pack
|
||||
|
||||
COUNT_BITS = 3
|
||||
|
||||
@ -32,3 +36,52 @@ def decompress_doc(data):
|
||||
|
||||
return ''.join([chr(i) for i in res])
|
||||
|
||||
def compress_doc(data):
|
||||
out = StringIO()
|
||||
i = 0
|
||||
ldata = len(data)
|
||||
while i < ldata:
|
||||
if i > 10 and (ldata - i) > 10:
|
||||
chunk = ''
|
||||
match = -1
|
||||
for j in xrange(10, 2, -1):
|
||||
chunk = data[i:i+j]
|
||||
try:
|
||||
match = data.rindex(chunk, 0, i)
|
||||
except ValueError:
|
||||
continue
|
||||
if (i - match) <= 2047:
|
||||
break
|
||||
match = -1
|
||||
if match >= 0:
|
||||
n = len(chunk)
|
||||
m = i - match
|
||||
code = 0x8000 + ((m << 3) & 0x3ff8) + (n - 3)
|
||||
out.write(pack('>H', code))
|
||||
i += n
|
||||
continue
|
||||
ch = data[i]
|
||||
och = ord(ch)
|
||||
i += 1
|
||||
if ch == ' ' and (i + 1) < ldata:
|
||||
onch = ord(data[i])
|
||||
if onch >= 0x40 and onch < 0x80:
|
||||
out.write(pack('>B', onch ^ 0x80))
|
||||
i += 1
|
||||
continue
|
||||
if och == 0 or (och >= 9 and och < 0x80):
|
||||
out.write(ch)
|
||||
else:
|
||||
j = i
|
||||
binseq = [ch]
|
||||
while True:
|
||||
ch = data[j]
|
||||
och = ord(ch)
|
||||
if och < 1 or (och > 8 and och < 0x80):
|
||||
break
|
||||
binseq.append(ch)
|
||||
out.write(pack('>B', len(binseq)))
|
||||
out.write(''.join(binseq))
|
||||
i += len(binseq) - 1
|
||||
return out.getvalue()
|
||||
|
||||
|
356
src/calibre/ebooks/mobi/writer.py
Normal file
356
src/calibre/ebooks/mobi/writer.py
Normal file
@ -0,0 +1,356 @@
|
||||
'''
|
||||
Write content to Mobipocket books.
|
||||
'''
|
||||
from __future__ import with_statement
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.cam>'
|
||||
|
||||
import sys
|
||||
import os
|
||||
from struct import pack
|
||||
import functools
|
||||
import time
|
||||
import random
|
||||
from cStringIO import StringIO
|
||||
import re
|
||||
from itertools import izip, count
|
||||
from collections import defaultdict
|
||||
from urlparse import urldefrag
|
||||
from lxml import etree
|
||||
from PIL import Image
|
||||
from calibre.ebooks.mobi.palmdoc import compress_doc
|
||||
from calibre.ebooks.mobi.langcodes import iana2mobi
|
||||
from calibre.ebooks.lit.oeb import XML_NS, XHTML, XHTML_NS, OEB_DOCS
|
||||
from calibre.ebooks.lit.oeb import xpath, barename, namespace, prefixname
|
||||
from calibre.ebooks.lit.oeb import FauxLogger, OEBBook
|
||||
|
||||
MBP_NS = 'http://mobipocket.com/ns/mbp'
|
||||
def MBP(name): return '{%s}%s' % (MBP_NS, name)
|
||||
|
||||
EXTH_CODES = {
|
||||
'creator': 100,
|
||||
'publisher': 101,
|
||||
'description': 103,
|
||||
'identifier': 104,
|
||||
'subject': 105,
|
||||
'date': 106,
|
||||
'review': 107,
|
||||
'contributor': 108,
|
||||
'rights': 109,
|
||||
'type': 111,
|
||||
'source': 112,
|
||||
'title': 503,
|
||||
}
|
||||
|
||||
UNCOMPRESSED = 1
|
||||
PALMDOC = 2
|
||||
HUFFDIC = 17480
|
||||
|
||||
def encode(data):
|
||||
return data.encode('ascii', 'xmlcharrefreplace')
|
||||
|
||||
|
||||
|
||||
class Serializer(object):
|
||||
NSRMAP = {'': None, XML_NS: 'xml', XHTML_NS: '', MBP_NS: 'mbp'}
|
||||
|
||||
def __init__(self, oeb, images):
|
||||
self.oeb = oeb
|
||||
self.images = images
|
||||
self.id_offsets = {}
|
||||
self.href_offsets = defaultdict(list)
|
||||
buffer = self.buffer = StringIO()
|
||||
buffer.write('<html>')
|
||||
self.serialize_head()
|
||||
self.serialize_body()
|
||||
buffer.write('</html>')
|
||||
self.fixup_links()
|
||||
self.raw = buffer.getvalue()
|
||||
|
||||
def __str__(self):
|
||||
return self.raw
|
||||
|
||||
def serialize_head(self):
|
||||
buffer = self.buffer
|
||||
buffer.write('<head>')
|
||||
if len(self.oeb.guide) > 0:
|
||||
self.serialize_guide()
|
||||
buffer.write('</head>')
|
||||
|
||||
def serialize_guide(self):
|
||||
buffer = self.buffer
|
||||
buffer.write('<guide>')
|
||||
for ref in self.oeb.guide.values():
|
||||
buffer.write('<reference title="%s" type="%s" '
|
||||
% (ref.title, ref.type))
|
||||
self.serialize_href(ref.href)
|
||||
buffer.write('/>')
|
||||
buffer.write('</guide>')
|
||||
|
||||
def serialize_href(self, href, base=None):
|
||||
hrefs = self.oeb.manifest.hrefs
|
||||
path, frag = urldefrag(href)
|
||||
if path and base:
|
||||
path = base.abshref(path)
|
||||
if path and path not in hrefs:
|
||||
return False
|
||||
buffer = self.buffer
|
||||
item = hrefs[path] if path else None
|
||||
if item and item.spine_position is None:
|
||||
return False
|
||||
id = item.id if item else base.id
|
||||
frag = frag if frag else 'calibre_top'
|
||||
href = '#'.join((id, frag))
|
||||
buffer.write('filepos=')
|
||||
self.href_offsets[href].append(buffer.tell())
|
||||
buffer.write('0000000000')
|
||||
return True
|
||||
|
||||
def serialize_body(self):
|
||||
buffer = self.buffer
|
||||
buffer.write('<body>')
|
||||
for item in self.oeb.spine:
|
||||
self.serialize_item(item)
|
||||
buffer.write('</body>')
|
||||
|
||||
def serialize_item(self, item):
|
||||
buffer = self.buffer
|
||||
buffer.write('<mbp:pagebreak/>')
|
||||
# TODO: Figure out how to make the 'crossable' stuff work for
|
||||
# non-"linear" spine items.
|
||||
self.id_offsets[item.id + '#calibre_top'] = buffer.tell()
|
||||
for elem in item.data.find(XHTML('body')):
|
||||
self.serialize_elem(elem, item)
|
||||
|
||||
def serialize_elem(self, elem, item, nsrmap=NSRMAP):
|
||||
if namespace(elem.tag) not in nsrmap:
|
||||
return
|
||||
buffer = self.buffer
|
||||
hrefs = self.oeb.manifest.hrefs
|
||||
tag = prefixname(elem.tag, nsrmap)
|
||||
for attr in ('name', 'id'):
|
||||
if attr in elem.attrib:
|
||||
id = '#'.join((item.id, elem.attrib[attr]))
|
||||
self.id_offsets[id] = buffer.tell()
|
||||
del elem.attrib[attr]
|
||||
buffer.write('<')
|
||||
buffer.write(tag)
|
||||
if elem.attrib:
|
||||
for attr, val in elem.attrib.items():
|
||||
if namespace(attr) not in nsrmap:
|
||||
continue
|
||||
attr = prefixname(attr, nsrmap)
|
||||
buffer.write(' ')
|
||||
if attr == 'href':
|
||||
if self.serialize_href(val, item):
|
||||
continue
|
||||
elif attr == 'src' and val in hrefs:
|
||||
index = self.images[val]
|
||||
buffer.write('recindex="%05d"' % index)
|
||||
continue
|
||||
buffer.write('%s="%s"' % (attr, val))
|
||||
if elem.text or len(elem) > 0:
|
||||
buffer.write('>')
|
||||
if elem.text:
|
||||
buffer.write(encode(elem.text))
|
||||
for child in elem:
|
||||
self.serialize_elem(child, item)
|
||||
buffer.write('</%s>' % tag)
|
||||
else:
|
||||
buffer.write('/>')
|
||||
if elem.tail:
|
||||
buffer.write(encode(elem.tail))
|
||||
|
||||
def fixup_links(self):
|
||||
buffer = self.buffer
|
||||
for id, hoffs in self.href_offsets.items():
|
||||
ioff = self.id_offsets[id]
|
||||
for hoff in hoffs:
|
||||
buffer.seek(hoff)
|
||||
buffer.write('%010d' % ioff)
|
||||
|
||||
|
||||
class MobiWriter(object):
|
||||
def __init__(self, compress=None, logger=FauxLogger()):
|
||||
self._compress = compress or UNCOMPRESSED
|
||||
self._logger = logger
|
||||
|
||||
def dump(self, oeb, path):
|
||||
if hasattr(path, 'write'):
|
||||
return self._dump_stream(oeb, path)
|
||||
with open(path, 'w+b') as stream:
|
||||
return self._dump_stream(oeb, stream)
|
||||
|
||||
def _write(self, *data):
|
||||
for datum in data:
|
||||
self._stream.write(datum)
|
||||
|
||||
def _tell(self):
|
||||
return self._stream.tell()
|
||||
|
||||
def _dump_stream(self, oeb, stream):
|
||||
self._oeb = oeb
|
||||
self._stream = stream
|
||||
self._records = [None]
|
||||
self._generate_content()
|
||||
self._generate_record0()
|
||||
self._write_header()
|
||||
self._write_content()
|
||||
|
||||
def _generate_content(self):
|
||||
self._map_image_names()
|
||||
self._generate_text()
|
||||
self._generate_images()
|
||||
|
||||
def _map_image_names(self):
|
||||
index = 1
|
||||
self._images = images = {}
|
||||
for item in self._oeb.manifest.values():
|
||||
if item.media_type.startswith('image/'):
|
||||
images[item.href] = index
|
||||
index += 1
|
||||
|
||||
def _generate_text(self):
|
||||
serializer = Serializer(self._oeb, self._images)
|
||||
text = str(serializer)
|
||||
self._text_length = len(text)
|
||||
text = StringIO(text)
|
||||
nrecords = 0
|
||||
data = text.read(0x1000)
|
||||
while len(data) > 0:
|
||||
nrecords += 1
|
||||
if self._compress == PALMDOC:
|
||||
data = compress_doc(data)
|
||||
# Without the NUL Mobipocket Desktop 6.2 will thrash. Why?
|
||||
self._records.append(data + '\0')
|
||||
data = text.read(0x1000)
|
||||
self._text_nrecords = nrecords
|
||||
|
||||
def _rescale_image(self, data, maxsizeb, dimen=None):
|
||||
if dimen is not None:
|
||||
image = Image.open(StringIO(data))
|
||||
image.thumbnail(dimen, Image.ANTIALIAS)
|
||||
data = StringIO()
|
||||
image.save(data, image.format)
|
||||
data = data.getvalue()
|
||||
if len(data) < maxsizeb:
|
||||
return data
|
||||
image = Image.open(StringIO(data))
|
||||
for quality in xrange(95, -1, -1):
|
||||
data = StringIO()
|
||||
image.save(data, 'JPEG', quality=quality)
|
||||
data = data.getvalue()
|
||||
if len(data) <= maxsizeb:
|
||||
break
|
||||
return data
|
||||
|
||||
def _generate_images(self):
|
||||
images = [(index, href) for href, index in self._images.items()]
|
||||
images.sort()
|
||||
metadata = self._oeb.metadata
|
||||
coverid = metadata.cover[0] if metadata.cover else None
|
||||
for _, href in images:
|
||||
item = self._oeb.manifest.hrefs[href]
|
||||
maxsizek = 89 if coverid == item.id else 63
|
||||
maxsizeb = maxsizek * 1024
|
||||
data = self._rescale_image(item.data, maxsizeb)
|
||||
self._records.append(data)
|
||||
|
||||
def _generate_record0(self):
|
||||
metadata = self._oeb.metadata
|
||||
exth = self._build_exth()
|
||||
record0 = StringIO()
|
||||
record0.write(pack('>HHIHHHH', self._compress, 0, self._text_length,
|
||||
self._text_nrecords, 0x1000, 0, 0))
|
||||
uid = random.randint(0, 0xffffffff)
|
||||
title = str(metadata.title[0])
|
||||
record0.write('MOBI')
|
||||
record0.write(pack('>IIIII', 0xe8, 2, 65001, uid, 5))
|
||||
record0.write('\xff' * 40)
|
||||
record0.write(pack('>I', self._text_nrecords + 1))
|
||||
record0.write(pack('>II', 0xe8 + 16 + len(exth), len(title)))
|
||||
record0.write(iana2mobi(str(metadata.language[0])))
|
||||
record0.write('\0' * 8)
|
||||
record0.write(pack('>II', 5, self._text_nrecords + 1))
|
||||
record0.write('\0' * 16)
|
||||
record0.write(pack('>I', 0x50))
|
||||
record0.write('\0' * 32)
|
||||
record0.write(pack('>IIII', 0xffffffff, 0xffffffff, 0, 0))
|
||||
# TODO: What the hell are these fields?
|
||||
record0.write(pack('>IIIIIIIIIIIIIIIII',
|
||||
0, 0, 0, 0xffffffff, 0, 0xffffffff, 0, 0xffffffff, 0, 0xffffffff,
|
||||
0, 0xffffffff, 0, 0xffffffff, 0xffffffff, 1, 0xffffffff))
|
||||
record0.write(exth)
|
||||
record0.write(title)
|
||||
record0 = record0.getvalue()
|
||||
self._records[0] = record0 + ('\0' * (2452 - len(record0)))
|
||||
|
||||
def _build_exth(self):
|
||||
oeb = self._oeb
|
||||
exth = StringIO()
|
||||
nrecs = 0
|
||||
for term in oeb.metadata:
|
||||
if term not in EXTH_CODES: continue
|
||||
code = EXTH_CODES[term]
|
||||
for item in oeb.metadata[term]:
|
||||
data = str(item)
|
||||
exth.write(pack('>II', code, len(data) + 8))
|
||||
exth.write(data)
|
||||
nrecs += 1
|
||||
if oeb.metadata.cover:
|
||||
id = str(oeb.metadata.cover[0])
|
||||
item = oeb.manifest[id]
|
||||
href = item.href
|
||||
index = self._images[href] - 1
|
||||
exth.write(pack('>III', 0xc9, 0x0c, index))
|
||||
exth.write(pack('>III', 0xcb, 0x0c, 0))
|
||||
index = self._add_thumbnail(item) - 1
|
||||
exth.write(pack('>III', 0xca, 0x0c, index))
|
||||
nrecs += 3
|
||||
exth = exth.getvalue()
|
||||
trail = len(exth) % 4
|
||||
pad = '' if not trail else '\0' * (4 - trail)
|
||||
exth = ['EXTH', pack('>II', len(exth) + 12, nrecs), exth, pad]
|
||||
return ''.join(exth)
|
||||
|
||||
def _add_thumbnail(self, item):
|
||||
maxsizeb = 16 * 1024
|
||||
dimen = (180, 240)
|
||||
data = self._rescale_image(item.data, maxsizeb, dimen)
|
||||
manifest = self._oeb.manifest
|
||||
id, href = manifest.generate('thumbnail', 'thumbnail.jpeg')
|
||||
manifest.add(id, href, 'image/jpeg', data=data)
|
||||
index = len(self._images) + 1
|
||||
self._images[href] = index
|
||||
self._records.append(data)
|
||||
return index
|
||||
|
||||
def _write_header(self):
|
||||
title = str(self._oeb.metadata.title[0])
|
||||
title = re.sub('[^-A-Za-z0-9]+', '_', title)[:32]
|
||||
title = title + ('\0' * (32 - len(title)))
|
||||
now = int(time.time())
|
||||
nrecords = len(self._records)
|
||||
self._write(title, pack('>HHIIIIII', 0, 0, now, now, 0, 0, 0, 0),
|
||||
'BOOK', 'MOBI', pack('>IIH', nrecords, 0, nrecords))
|
||||
offset = self._tell() + (8 * nrecords) + 2
|
||||
for id, record in izip(count(), self._records):
|
||||
self._write(pack('>I', offset), '\0', pack('>I', id)[1:])
|
||||
offset += len(record)
|
||||
self._write('\0\0')
|
||||
|
||||
def _write_content(self):
|
||||
for record in self._records:
|
||||
self._write(record)
|
||||
|
||||
|
||||
def main(argv=sys.argv):
|
||||
inpath, outpath = argv[1:]
|
||||
oeb = OEBBook(inpath)
|
||||
writer = MobiWriter()
|
||||
writer.dump(oeb, outpath)
|
||||
return 0
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
@ -50,6 +50,8 @@ OPENTYPE_MIME = 'font/opentype'
|
||||
OEB_STYLES = set([CSS_MIME, OEB_CSS_MIME, 'text/x-oeb-css'])
|
||||
OEB_DOCS = set([XHTML_MIME, 'text/html', OEB_DOC_MIME, 'text/x-oeb-document'])
|
||||
|
||||
MS_COVER_TYPE = 'other.ms-coverimage-standard'
|
||||
|
||||
|
||||
def element(parent, *args, **kwargs):
|
||||
if parent is not None:
|
||||
@ -66,6 +68,12 @@ def barename(name):
|
||||
return name.split('}', 1)[1]
|
||||
return name
|
||||
|
||||
def prefixname(name, nsrmap):
|
||||
prefix = nsrmap[namespace(name)]
|
||||
if not prefix:
|
||||
return barename(name)
|
||||
return ':'.join((prefix, barename(name)))
|
||||
|
||||
def xpath(elem, expr):
|
||||
return elem.xpath(expr, namespaces=XPNSMAP)
|
||||
|
||||
@ -147,6 +155,7 @@ class Metadata(object):
|
||||
TERMS = set(['contributor', 'coverage', 'creator', 'date', 'description',
|
||||
'format', 'identifier', 'language', 'publisher', 'relation',
|
||||
'rights', 'source', 'subject', 'title', 'type'])
|
||||
ATTRS = set(['role', 'file-as', 'scheme'])
|
||||
OPF1_NSMAP = {'dc': DC11_NS, 'oebpackage': OPF1_NS}
|
||||
OPF2_NSMAP = {'opf': OPF2_NS, 'dc': DC11_NS, 'dcterms': DCTERMS_NS,
|
||||
'xsi': XSI_NS}
|
||||
@ -163,6 +172,11 @@ class Metadata(object):
|
||||
self.value = value
|
||||
self.attrib = attrib = {}
|
||||
for fq_attr in fq_attrib:
|
||||
if fq_attr in Metadata.ATTRS:
|
||||
attr = fq_attr
|
||||
fq_attr = OPF2(fq_attr)
|
||||
fq_attrib[fq_attr] = fq_attrib.pop(attr)
|
||||
else:
|
||||
attr = barename(fq_attr)
|
||||
attrib[attr] = fq_attrib[fq_attr]
|
||||
|
||||
@ -180,7 +194,7 @@ class Metadata(object):
|
||||
% (barename(self.term), self.value, self.attrib)
|
||||
|
||||
def __str__(self):
|
||||
return str(self.value)
|
||||
return unicode(self.value).encode('ascii', 'xmlcharrefreplace')
|
||||
|
||||
def __unicode__(self):
|
||||
return unicode(self.value)
|
||||
@ -318,6 +332,14 @@ class Manifest(object):
|
||||
relhref = '#'.join((relhref, frag))
|
||||
return relhref
|
||||
|
||||
def abshref(self, href):
|
||||
if '/' not in self.href:
|
||||
return href
|
||||
dirname = os.path.dirname(self.href)
|
||||
href = os.path.join(dirname, href)
|
||||
href = os.path.normpath(href).replace('\\', '/')
|
||||
return href
|
||||
|
||||
def __init__(self, oeb):
|
||||
self.oeb = oeb
|
||||
self.ids = {}
|
||||
@ -503,6 +525,9 @@ class Guide(object):
|
||||
def __contains__(self, key):
|
||||
return key in self.refs
|
||||
|
||||
def __len__(self):
|
||||
return len(self.refs)
|
||||
|
||||
def to_opf1(self, parent=None):
|
||||
elem = element(parent, 'guide')
|
||||
for ref in self.refs.values():
|
||||
@ -652,6 +677,15 @@ class OEBBook(object):
|
||||
else:
|
||||
self.logger.log_warn(u'Unique-identifier %r not found.' % uid)
|
||||
self.uid = metadata.identifier[0]
|
||||
if not metadata.language:
|
||||
self.logger.log_warn(u'Language not specified.')
|
||||
metadata.add('language', 'en')
|
||||
if not metadata.creator:
|
||||
self.logger.log_warn(u'Creator not specified.')
|
||||
metadata.add('creator', 'Unknown')
|
||||
if not metadata.title:
|
||||
self.logger.log_warn(u'Title not specified.')
|
||||
metadata.add('title', 'Unknown')
|
||||
|
||||
def _manifest_from_opf(self, opf):
|
||||
self.manifest = manifest = Manifest(self)
|
||||
@ -790,12 +824,32 @@ class OEBBook(object):
|
||||
if self._toc_from_html(opf): return
|
||||
self._toc_from_spine(opf)
|
||||
|
||||
def _ensure_cover_image(self):
|
||||
cover = None
|
||||
if self.metadata.cover:
|
||||
id = str(self.metadata.cover[0])
|
||||
cover = self.manifest[id]
|
||||
elif MS_COVER_TYPE in self.guide:
|
||||
href = self.guide[MS_COVER_TYPE].href
|
||||
cover = self.manifest.hrefs[href]
|
||||
elif 'cover' in self.guide:
|
||||
href = self.guide['cover'].href
|
||||
cover = self.manifest.hrefs[href]
|
||||
else:
|
||||
html = self.spine[0].data
|
||||
imgs = xpath(html, '//h:img[position()=1]')
|
||||
href = imgs[0].get('src') if imgs else None
|
||||
cover = self.manifest.hrefs[href] if href else None
|
||||
if cover and not self.metadata.cover:
|
||||
self.metadata.add('cover', cover.id)
|
||||
|
||||
def _all_from_opf(self, opf):
|
||||
self._metadata_from_opf(opf)
|
||||
self._manifest_from_opf(opf)
|
||||
self._spine_from_opf(opf)
|
||||
self._guide_from_opf(opf)
|
||||
self._toc_from_opf(opf)
|
||||
self._ensure_cover_image()
|
||||
|
||||
def to_opf1(self):
|
||||
package = etree.Element('package',
|
||||
@ -859,6 +913,7 @@ class OEBBook(object):
|
||||
NCX_MIME: (href, ncx)}
|
||||
|
||||
|
||||
|
||||
def main(argv=sys.argv):
|
||||
for arg in argv[1:]:
|
||||
oeb = OEBBook(arg)
|
||||
|
Loading…
x
Reference in New Issue
Block a user