mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Implement conversion of non-DRMed MOBI/PRC files to OEBPS ebooks. Handles all compression methods.
This commit is contained in:
parent
97fdac5ef9
commit
782b195531
1
setup.py
1
setup.py
@ -44,6 +44,7 @@ entry_points = {
|
||||
'pdfreflow = libprs500.ebooks.lrf.pdf.reflow:main',
|
||||
'isbndb = libprs500.ebooks.metadata.isbndb:main',
|
||||
'librarything = libprs500.ebooks.metadata.library_thing:main',
|
||||
'mobi2oeb = libprs500.ebooks.mobi.reader:main',
|
||||
'lrf2html = libprs500.ebooks.lrf.html.convert_to:main',
|
||||
],
|
||||
'gui_scripts' : [
|
||||
|
@ -177,6 +177,7 @@ class OPF(MetaInformation):
|
||||
MIMETYPE = 'application/oebps-package+xml'
|
||||
ENTITY_PATTERN = re.compile(r'&(\S+?);')
|
||||
|
||||
uid = standard_field('uid')
|
||||
libprs_id = standard_field('libprs_id')
|
||||
title = standard_field('title')
|
||||
authors = standard_field('authors')
|
||||
@ -239,10 +240,11 @@ class OPF(MetaInformation):
|
||||
|
||||
dcms = metadata.getElementsByTagName(type)
|
||||
if dcms:
|
||||
dcm = dcms[0]
|
||||
dcm = dcms[0]
|
||||
else:
|
||||
dcm = doc.createElement(type)
|
||||
metadata.appendChild(dcm)
|
||||
metadata.appendChild(doc.createTextNode('\n'))
|
||||
tags = dcm.getElementsByTagName(name)
|
||||
if tags and not replace:
|
||||
for tag in tags:
|
||||
@ -260,6 +262,7 @@ class OPF(MetaInformation):
|
||||
for attr, vattr in vattrs:
|
||||
el.setAttribute(attr, vattr)
|
||||
dcm.appendChild(el)
|
||||
dcm.appendChild(doc.createTextNode('\n'))
|
||||
self._commit(doc)
|
||||
|
||||
|
||||
@ -350,6 +353,15 @@ class OPF(MetaInformation):
|
||||
comments = ''
|
||||
self._set_metadata_element('dc:Description', comments)
|
||||
|
||||
def get_uid(self):
|
||||
package = self.soup.find('package')
|
||||
if package.has_key('unique-identifier'):
|
||||
return package['unique-identifier']
|
||||
|
||||
def set_uid(self, uid):
|
||||
package = self.soup.find('package')
|
||||
package['unique-identifier'] = str(uid)
|
||||
|
||||
def get_category(self):
|
||||
category = self.soup.find('dc:type')
|
||||
if category:
|
||||
@ -500,7 +512,12 @@ class OPF(MetaInformation):
|
||||
self._set_metadata_element('dc:Subject', tags)
|
||||
|
||||
def write(self, stream):
|
||||
stream.write(self.soup.prettify('utf-8'))
|
||||
src = unicode(self.soup)
|
||||
src = re.sub(r'>\s*</item(ref)*>', ' />\n', src)
|
||||
src = re.sub(r'<manifest><', '<manifest>\n<', src)
|
||||
src = re.sub(r'<spine><', '<spine>\n<', src)
|
||||
src = re.sub(r'^<item', ' <item', src)
|
||||
stream.write(src.encode('utf-8')+'\n')
|
||||
|
||||
class OPFReader(OPF):
|
||||
|
||||
@ -543,6 +560,44 @@ class OPFCreator(OPF):
|
||||
self.isbn = mi.isbn
|
||||
if hasattr(mi, 'libprs_id'):
|
||||
self.libprs_id = mi.libprs_id
|
||||
if hasattr(mi, 'uid'):
|
||||
self.uid = mi.uid
|
||||
|
||||
def create_manifest(self, entries):
|
||||
doc = dom.parseString(self.soup.__str__('UTF-8').strip())
|
||||
package = doc.documentElement
|
||||
manifest = doc.createElement('manifest')
|
||||
package.appendChild(manifest)
|
||||
package.appendChild(doc.createTextNode('\n'))
|
||||
|
||||
self.href_map = {}
|
||||
|
||||
for href, media_type in entries:
|
||||
item = doc.createElement('item')
|
||||
item.setAttribute('href', href)
|
||||
item.setAttribute('media-type', media_type)
|
||||
self.href_map[href] = str(hash(href))
|
||||
item.setAttribute('id', self.href_map[href])
|
||||
manifest.appendChild(item)
|
||||
manifest.appendChild(doc.createTextNode('\n'))
|
||||
|
||||
self._commit(doc)
|
||||
|
||||
|
||||
def create_spine(self, entries):
|
||||
doc = dom.parseString(self.soup.__str__('UTF-8').strip())
|
||||
package = doc.documentElement
|
||||
spine = doc.createElement('spine')
|
||||
package.appendChild(spine)
|
||||
package.appendChild(doc.createTextNode('\n'))
|
||||
|
||||
for href in entries:
|
||||
itemref = doc.createElement('itemref')
|
||||
itemref.setAttribute('idref', self.href_map[href])
|
||||
spine.appendChild(itemref)
|
||||
spine.appendChild(doc.createTextNode('\n'))
|
||||
|
||||
self._commit(doc)
|
||||
|
||||
def main(args=sys.argv):
|
||||
parser = get_parser('opf')
|
||||
|
20
src/libprs500/ebooks/mobi/__init__.py
Normal file
20
src/libprs500/ebooks/mobi/__init__.py
Normal file
@ -0,0 +1,20 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
|
||||
## Copyright (C) 2008 Kovid Goyal kovid@kovidgoyal.net
|
||||
## This program is free software; you can redistribute it and/or modify
|
||||
## it under the terms of the GNU General Public License as published by
|
||||
## the Free Software Foundation; either version 2 of the License, or
|
||||
## (at your option) any later version.
|
||||
##
|
||||
## This program is distributed in the hope that it will be useful,
|
||||
## but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
## GNU General Public License for more details.
|
||||
##
|
||||
## You should have received a copy of the GNU General Public License along
|
||||
## with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
class MobiError(Exception):
|
||||
pass
|
127
src/libprs500/ebooks/mobi/huffcdic.py
Normal file
127
src/libprs500/ebooks/mobi/huffcdic.py
Normal file
@ -0,0 +1,127 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
|
||||
## Copyright (C) 2008 Kovid Goyal kovid@kovidgoyal.net
|
||||
## This program is free software; you can redistribute it and/or modify
|
||||
## it under the terms of the GNU General Public License as published by
|
||||
## the Free Software Foundation; either version 2 of the License, or
|
||||
## (at your option) any later version.
|
||||
##
|
||||
## This program is distributed in the hope that it will be useful,
|
||||
## but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
## GNU General Public License for more details.
|
||||
##
|
||||
## You should have received a copy of the GNU General Public License along
|
||||
## with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
'''
|
||||
Decompress MOBI files compressed with the Huff/cdic algorithm. Code thanks to darkninja
|
||||
and igorsk.
|
||||
'''
|
||||
|
||||
import struct
|
||||
|
||||
from libprs500.ebooks.mobi import MobiError
|
||||
|
||||
class BitReader(object):
|
||||
|
||||
def __init__(self, data):
|
||||
self.data, self.pos, self.nbits = data + "\x00\x00\x00\x00", 0, len(data) * 8
|
||||
|
||||
def peek(self, n):
|
||||
r, g = 0, 0
|
||||
while g < n:
|
||||
r, g = (r << 8) | ord(self.data[(self.pos+g)>>3]), g + 8 - ((self.pos+g) & 7)
|
||||
return (r >> (g - n)) & ((1 << n) - 1)
|
||||
|
||||
def eat(self, n):
|
||||
self.pos += n
|
||||
return self.pos <= self.nbits
|
||||
|
||||
def left(self):
|
||||
return self.nbits - self.pos
|
||||
|
||||
class HuffReader(object):
|
||||
|
||||
def __init__(self, huffs, extra_flags, codec='cp1252'):
|
||||
self.huffs, self.extra_flags, self.codec = huffs, extra_flags, codec
|
||||
|
||||
if huffs[0][0:4] != 'HUFF' or huffs[0][4:8] != '\x00\x00\x00\x18':
|
||||
raise MobiError('Invalid HUFF header')
|
||||
|
||||
if huffs[1][0:4] != 'CDIC' or huffs[1][4:8] != '\x00\x00\x00\x10':
|
||||
raise ValueError('Invalid CDIC header')
|
||||
|
||||
self.entry_bits, = struct.unpack('>L', huffs[1][12:16])
|
||||
off1,off2 = struct.unpack('>LL', huffs[0][16:24])
|
||||
self.dict1 = struct.unpack('<256L', huffs[0][off1:off1+256*4])
|
||||
self.dict2 = struct.unpack('<64L', huffs[0][off2:off2+64*4])
|
||||
self.dicts = huffs[1:]
|
||||
self.r = ''
|
||||
|
||||
def _unpack(self, bits, depth = 0):
|
||||
if depth > 32:
|
||||
raise MobiError('Corrupt file')
|
||||
|
||||
while bits.left():
|
||||
dw = bits.peek(32)
|
||||
v = self.dict1[dw >> 24]
|
||||
codelen = v & 0x1F
|
||||
assert codelen != 0
|
||||
code = dw >> (32 - codelen)
|
||||
r = (v >> 8)
|
||||
if not (v & 0x80):
|
||||
while code < self.dict2[(codelen-1)*2]:
|
||||
codelen += 1
|
||||
code = dw >> (32 - codelen)
|
||||
r = self.dict2[(codelen-1)*2+1]
|
||||
r -= code
|
||||
assert codelen != 0
|
||||
if not bits.eat(codelen):
|
||||
return
|
||||
dicno = r >> self.entry_bits
|
||||
off1 = 16 + (r - (dicno << self.entry_bits)) * 2
|
||||
dic = self.dicts[dicno]
|
||||
off2 = 16 + ord(dic[off1]) * 256 + ord(dic[off1+1])
|
||||
blen = ord(dic[off2]) * 256 + ord(dic[off2+1])
|
||||
slice = dic[off2+2:off2+2+(blen&0x7fff)]
|
||||
if blen & 0x8000:
|
||||
self.r += slice
|
||||
else:
|
||||
self._unpack(BitReader(slice), depth + 1)
|
||||
|
||||
def unpack(self, data):
|
||||
self.r = ''
|
||||
self._unpack(BitReader(data))
|
||||
return self.r
|
||||
|
||||
def sizeof_trailing_entries(self, data):
|
||||
|
||||
def sizeof_trailing_entry(ptr, psize):
|
||||
bitpos, result = 0, 0
|
||||
while True:
|
||||
v = ord(ptr[psize-1])
|
||||
result |= (v & 0x7F) << bitpos
|
||||
bitpos += 7
|
||||
psize -= 1
|
||||
if (v & 0x80) != 0 or (bitpos >= 28) or (psize == 0):
|
||||
return result
|
||||
|
||||
num = 0
|
||||
size = len(data)
|
||||
flags = self.extra_flags >> 1
|
||||
while flags:
|
||||
if flags & 1:
|
||||
num += sizeof_trailing_entry(data, size - num)
|
||||
flags >>= 1
|
||||
return num
|
||||
|
||||
def decompress(self, sections):
|
||||
r = ''
|
||||
for data in sections:
|
||||
trail_size = self.sizeof_trailing_entries(data)
|
||||
r += self.unpack(data[:len(data)-trail_size])
|
||||
if r.endswith('#'):
|
||||
r = r[:-1]
|
||||
return r.decode(self.codec)
|
46
src/libprs500/ebooks/mobi/palmdoc.py
Normal file
46
src/libprs500/ebooks/mobi/palmdoc.py
Normal file
@ -0,0 +1,46 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
|
||||
## Copyright (C) 2008 Kovid Goyal kovid@kovidgoyal.net
|
||||
## This program is free software; you can redistribute it and/or modify
|
||||
## it under the terms of the GNU General Public License as published by
|
||||
## the Free Software Foundation; either version 2 of the License, or
|
||||
## (at your option) any later version.
|
||||
##
|
||||
## This program is distributed in the hope that it will be useful,
|
||||
## but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
## GNU General Public License for more details.
|
||||
##
|
||||
## You should have received a copy of the GNU General Public License along
|
||||
## with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
COUNT_BITS = 3
|
||||
|
||||
def decompress_doc(data, codec='cp1252'):
|
||||
buffer = [ord(i) for i in data]
|
||||
res = []
|
||||
i = 0
|
||||
while i < len(buffer):
|
||||
c = buffer[i]
|
||||
i += 1
|
||||
if c >= 1 and c <= 8:
|
||||
res.extend(buffer[i:i+c])
|
||||
i += c
|
||||
elif c <= 0x7f:
|
||||
res.append(c)
|
||||
elif c >= 0xc0:
|
||||
res.extend( (ord(' '), c^0x80) )
|
||||
else:
|
||||
c = (c << 8) + buffer[i]
|
||||
i += 1
|
||||
di = (c & 0x3fff) >> COUNT_BITS
|
||||
j = len(res)
|
||||
num = (c & ((1 << COUNT_BITS) - 1)) + 3
|
||||
|
||||
for k in range( num ):
|
||||
res.append(res[j - di+k])
|
||||
|
||||
return unicode(''.join([chr(i) for i in res]), codec)
|
||||
|
296
src/libprs500/ebooks/mobi/reader.py
Normal file
296
src/libprs500/ebooks/mobi/reader.py
Normal file
@ -0,0 +1,296 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
|
||||
## Copyright (C) 2008 Kovid Goyal kovid@kovidgoyal.net
|
||||
## This program is free software; you can redistribute it and/or modify
|
||||
## it under the terms of the GNU General Public License as published by
|
||||
## the Free Software Foundation; either version 2 of the License, or
|
||||
## (at your option) any later version.
|
||||
##
|
||||
## This program is distributed in the hope that it will be useful,
|
||||
## but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
## GNU General Public License for more details.
|
||||
##
|
||||
## You should have received a copy of the GNU General Public License along
|
||||
## with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
'''
|
||||
Read data from .mobi files
|
||||
'''
|
||||
|
||||
import sys, struct, os, cStringIO, re
|
||||
|
||||
try:
|
||||
from PIL import Image as PILImage
|
||||
except ImportError:
|
||||
import Image as PILImage
|
||||
|
||||
from libprs500.ebooks.mobi import MobiError
|
||||
from libprs500.ebooks.mobi.huffcdic import HuffReader
|
||||
from libprs500.ebooks.mobi.palmdoc import decompress_doc
|
||||
from libprs500.ebooks.metadata import MetaInformation
|
||||
from libprs500.ebooks.metadata.opf import OPFCreator
|
||||
|
||||
|
||||
class EXTHHeader(object):
|
||||
|
||||
def __init__(self, raw, codec):
|
||||
self.doctype = raw[:4]
|
||||
self.length, self.num_items = struct.unpack('>LL', raw[4:12])
|
||||
raw = raw[12:]
|
||||
pos = 0
|
||||
|
||||
self.mi = MetaInformation('Unknown', ['Unknown'])
|
||||
self.has_fake_cover = True
|
||||
|
||||
for i in range(self.num_items):
|
||||
id, size = struct.unpack('>LL', raw[pos:pos+8])
|
||||
content = raw[pos+8:pos+size]
|
||||
pos += size
|
||||
if id >= 100 and id < 200:
|
||||
self.process_metadata(id, content, codec)
|
||||
elif id == 203:
|
||||
self.has_fake_cover = bool(struct.unpack('>L', content)[0])
|
||||
elif id == 201:
|
||||
self.cover_offset, = struct.unpack('>L', content)
|
||||
elif id == 202:
|
||||
self.thumbnail_offset, = struct.unpack('>L', content)
|
||||
pos += 3
|
||||
stop = raw.find('\x00')
|
||||
if stop > -1:
|
||||
self.mi.title = raw[pos:stop].decode(codec, 'ignore')
|
||||
|
||||
|
||||
def process_metadata(self, id, content, codec):
|
||||
if id == 100:
|
||||
aus = content.split(',')
|
||||
authors = []
|
||||
for a in aus:
|
||||
authors.extend(a.split('&'))
|
||||
self.mi.authors = [i.decode(codec, 'ignore') for i in authors]
|
||||
elif id == 101:
|
||||
self.mi.publisher = content.decode(codec, 'ignore')
|
||||
elif id == 103:
|
||||
self.mi.comments = content.decode(codec, 'ignore')
|
||||
elif id == 104:
|
||||
self.mi.isbn = content.decode(codec, 'ignore').strip().replace('-', '')
|
||||
elif id == 105:
|
||||
self.mi.category = content.decode(codec, 'ignore')
|
||||
|
||||
|
||||
|
||||
class BookHeader(object):
|
||||
|
||||
def __init__(self, raw, ident):
|
||||
self.compression_type = raw[:2]
|
||||
self.records, self.records_size = struct.unpack('>HH', raw[8:12])
|
||||
self.encryption_type, = struct.unpack('>H', raw[12:14])
|
||||
self.doctype = raw[16:20]
|
||||
self.length, self.type, self.codepage, self.unique_id, self.version = \
|
||||
struct.unpack('>LLLLL', raw[20:40])
|
||||
|
||||
if ident == 'TEXTREAD':
|
||||
self.codepage = 1252
|
||||
|
||||
try:
|
||||
self.codec = {
|
||||
1252 : 'cp1252',
|
||||
65001 : 'utf-8',
|
||||
}[self.codepage]
|
||||
except IndexError, KeyError:
|
||||
raise MobiError('Unknown codepage: %d'%self.codepage)
|
||||
|
||||
if ident == 'TEXTREAD':
|
||||
self.extra_flags = 0
|
||||
else:
|
||||
self.extra_flags, = struct.unpack('>L', raw[0xF0:0xF4])
|
||||
|
||||
if self.compression_type == 'DH':
|
||||
self.huff_offset, self.huff_number = struct.unpack('>LL', raw[0x70:0x78])
|
||||
|
||||
self.exth_flag, = struct.unpack('>L', raw[0x80:0x84])
|
||||
self.exth = None
|
||||
if self.exth_flag & 0x40:
|
||||
self.exth = EXTHHeader(raw[16+self.length:], self.codec)
|
||||
self.exth.mi.uid = self.unique_id
|
||||
|
||||
|
||||
class MobiReader(object):
|
||||
|
||||
PAGE_BREAK_PAT = re.compile(r'(<[/]{0,1}mbp:pagebreak\s*[/]{0,1}>)+', re.IGNORECASE)
|
||||
|
||||
def __init__(self, filename_or_stream):
|
||||
if hasattr(filename_or_stream, 'read'):
|
||||
stream = filename_or_stream
|
||||
stream.seek(0)
|
||||
else:
|
||||
stream = open(filename_or_stream, 'rb')
|
||||
|
||||
raw = stream.read()
|
||||
|
||||
self.header = raw[0:72]
|
||||
self.name = self.header[:32].replace('\x00', '')
|
||||
self.num_sections, = struct.unpack('>H', raw[76:78])
|
||||
|
||||
self.ident = self.header[0x3C:0x3C+8].upper()
|
||||
if self.ident not in ['BOOKMOBI', 'TEXTREAD']:
|
||||
raise MobiError('Unknown book type: %s'%self.ident)
|
||||
|
||||
self.sections = []
|
||||
self.section_headers = []
|
||||
for i in range(self.num_sections):
|
||||
offset, a1, a2, a3, a4 = struct.unpack('>LBBBB', raw[78+i*8:78+i*8+8])
|
||||
flags, val = a1, a2<<16 | a3<<8 | a4
|
||||
self.section_headers.append((offset, flags, val))
|
||||
|
||||
def section(section_number):
|
||||
if section_number == self.num_sections - 1:
|
||||
end_off = len(raw)
|
||||
else:
|
||||
end_off = self.section_headers[section_number + 1][0]
|
||||
off = self.section_headers[section_number][0]
|
||||
|
||||
return raw[off:end_off]
|
||||
|
||||
for i in range(self.num_sections):
|
||||
self.sections.append((section(i), self.section_headers[i]))
|
||||
|
||||
|
||||
self.book_header = BookHeader(self.sections[0][0], self.ident)
|
||||
|
||||
|
||||
def extract_content(self, output_dir=os.getcwdu()):
|
||||
if self.book_header.encryption_type != 0:
|
||||
raise MobiError('Cannot extract content from DRM protected ebook')
|
||||
text_sections = [self.sections[i][0] for i in range(1, self.book_header.records+1)]
|
||||
processed_records = list(range(0, self.book_header.records+1))
|
||||
|
||||
self.mobi_html = u''
|
||||
codec = self.book_header.codec
|
||||
|
||||
if self.book_header.compression_type == 'DH':
|
||||
huffs = [self.sections[i][0] for i in
|
||||
range(self.book_header.huff_offset,
|
||||
self.book_header.huff_offset+self.book_header.huff_number)]
|
||||
processed_records += list(range(self.book_header.huff_offset,
|
||||
self.book_header.huff_offset+self.book_header.huff_number))
|
||||
huff = HuffReader(huffs, self.book_header.extra_flags, codec)
|
||||
self.mobi_html = huff.decompress(text_sections)
|
||||
|
||||
elif self.book_header.compression_type == '\x00\x02':
|
||||
for section in text_sections:
|
||||
self.mobi_html += decompress_doc(section, codec)
|
||||
|
||||
elif self.book_header.compression_type == '\x00\x01':
|
||||
t = [i.decode(codec) for i in text_sections]
|
||||
self.mobi_html = ''.join(t)
|
||||
|
||||
else:
|
||||
raise MobiError('Unknown compression algorithm: %s'%repr(self.book_header.compression_type))
|
||||
|
||||
self.add_anchors()
|
||||
self.extract_images(processed_records, output_dir)
|
||||
self.replace_page_breaks()
|
||||
|
||||
self.processed_html = re.compile('<head>', re.IGNORECASE).sub(
|
||||
'<head>\n<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />\n',
|
||||
self.processed_html)
|
||||
|
||||
htmlfile = os.path.join(output_dir, self.name+'.html')
|
||||
open(htmlfile, 'wb').write(self.processed_html.encode('utf8'))
|
||||
|
||||
if self.book_header.exth is not None:
|
||||
mi = self.book_header.exth.mi
|
||||
opf = OPFCreator(mi)
|
||||
if hasattr(self.book_header.exth, 'cover_offset'):
|
||||
opf.cover = 'images/%d.jpg'%(self.book_header.exth.cover_offset+1)
|
||||
manifest = [(os.path.basename(htmlfile), 'text/x-oeb1-document')]
|
||||
for i in self.image_names:
|
||||
manifest.append(('images/'+i, 'image/jpg'))
|
||||
|
||||
opf.create_manifest(manifest)
|
||||
opf.create_spine([os.path.basename(htmlfile)])
|
||||
opf.write(open(os.path.splitext(htmlfile)[0]+'.opf', 'wb'))
|
||||
|
||||
|
||||
def replace_page_breaks(self):
|
||||
self.processed_html = self.PAGE_BREAK_PAT.sub('<br style="page-break-after:always" />',
|
||||
self.processed_html)
|
||||
|
||||
def add_anchors(self):
|
||||
positions = []
|
||||
link_pattern = re.compile(r'<a\s+filepos=(\d+)', re.IGNORECASE)
|
||||
for match in link_pattern.finditer(self.mobi_html):
|
||||
positions.append(int(match.group(1)))
|
||||
pos = 0
|
||||
self.processed_html = ''
|
||||
for end in positions:
|
||||
oend = end
|
||||
l = self.mobi_html.find('<', end)
|
||||
r = self.mobi_html.find('>', end)
|
||||
if r > -1 and r < l: # Move out of tag
|
||||
end = r+1
|
||||
self.processed_html += self.mobi_html[pos:end] + '<a name="%d" />'%oend
|
||||
pos = end
|
||||
|
||||
self.processed_html += self.mobi_html[pos:]
|
||||
self.processed_html = link_pattern.sub(lambda match: '<a href="#%d"'%int(match.group(1)),
|
||||
self.processed_html)
|
||||
|
||||
|
||||
def extract_images(self, processed_records, output_dir):
|
||||
output_dir = os.path.abspath(os.path.join(output_dir, 'images'))
|
||||
if not os.path.exists(output_dir):
|
||||
os.makedirs(output_dir)
|
||||
image_index = 0
|
||||
self.image_names = []
|
||||
for i in range(self.num_sections):
|
||||
if i in processed_records:
|
||||
continue
|
||||
processed_records.append(i)
|
||||
data = self.sections[i][0]
|
||||
buf = cStringIO.StringIO(data)
|
||||
try:
|
||||
im = PILImage.open(buf)
|
||||
except IOError:
|
||||
continue
|
||||
image_index += 1
|
||||
path = os.path.join(output_dir, '%05d.jpg'%image_index)
|
||||
self.image_names.append(os.path.basename(path))
|
||||
im.convert('RGB').save(open(path, 'wb'), format='JPEG')
|
||||
|
||||
def fix_images(match):
|
||||
one = re.compile(r'src=["\']{0,1}[^\'"]+["\']{0,1}', re.IGNORECASE).sub('', match.group(1)).strip()
|
||||
return '<img'+one+' src="images/%s.jpg"'%match.group(2)
|
||||
|
||||
self.processed_html = \
|
||||
re.compile(r'<img(.+?)recindex=[\'"]{0,1}(\d+)[\'"]{0,1}', re.IGNORECASE|re.DOTALL)\
|
||||
.sub(fix_images, self.processed_html)
|
||||
|
||||
|
||||
def option_parser():
|
||||
from libprs500 import OptionParser
|
||||
parser = OptionParser(usage='%prog [options] myebook.mobi')
|
||||
parser.add_option('-o', '--output-dir', default='.',
|
||||
help='Output directory. Defaults to current directory.')
|
||||
return parser
|
||||
|
||||
|
||||
def main(args=sys.argv):
|
||||
parser = option_parser()
|
||||
opts, args = parser.parse_args(args)
|
||||
if len(args) != 2:
|
||||
parser.print_help()
|
||||
return 1
|
||||
|
||||
mr = MobiReader(args[1])
|
||||
opts.output_dir = os.path.abspath(opts.output_dir)
|
||||
mr.extract_content(opts.output_dir)
|
||||
|
||||
print 'OEB ebook created in', opts.output_dir
|
||||
|
||||
return 0
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
@ -98,6 +98,7 @@ def setup_completion(fatal_errors):
|
||||
from libprs500.ebooks.lrf.parser import option_parser as lrf2lrsop
|
||||
from libprs500.gui2.lrf_renderer.main import option_parser as lrfviewerop
|
||||
from libprs500.ebooks.lrf.pdf.reflow import option_parser as pdfhtmlop
|
||||
from libprs500.ebooks.mobi.reader import option_parser as mobioeb
|
||||
|
||||
f = open_file('/etc/bash_completion.d/libprs500')
|
||||
|
||||
@ -120,6 +121,7 @@ def setup_completion(fatal_errors):
|
||||
f.write(opts_and_exts('epub-meta', metaop, ['epub']))
|
||||
f.write(opts_and_exts('lrfviewer', lrfviewerop, ['lrf']))
|
||||
f.write(opts_and_exts('pdfrelow', pdfhtmlop, ['pdf']))
|
||||
f.write(opts_and_exts('mobi2oeb', mobioeb, ['mobi', 'prc']))
|
||||
f.write('''
|
||||
_prs500_ls()
|
||||
{
|
||||
|
Loading…
x
Reference in New Issue
Block a user