Re-integrate changes from home

This commit is contained in:
Marshall T. Vandegrift 2008-07-16 10:46:36 -04:00
commit 76b3759947
6 changed files with 1353 additions and 1039 deletions

View File

@ -1,5 +1,2 @@
import calibre.ebooks.maps.opf as opf
import calibre.ebooks.maps.html as html
OPF_MAP = opf.MAP
HTML_MAP = html.MAP
from calibre.ebooks.lit.maps.opf import MAP as OPF_MAP
from calibre.ebooks.lit.maps.html import MAP as HTML_MAP

File diff suppressed because it is too large Load Diff

View File

@ -1,28 +1,3 @@
ATTRS = {
0x0001 => "href",
0x0002 => "%never-used",
0x0003 => "%guid",
0x0004 => "%minimum_level",
0x0005 => "%attr5",
0x0006 => "id",
0x0007 => "href",
0x0008 => "media-type",
0x0009 => "fallback",
0x000A => "idref",
0x000B => "xmlns:dc",
0x000C => "xmlns:oebpackage",
0x000D => "role",
0x000E => "file-as",
0x000F => "event",
0x0010 => "scheme",
0x0011 => "title",
0x0012 => "type",
0x0013 => "unique-identifier",
0x0014 => "name",
0x0015 => "content",
0x0016 => "xml:lang",
}
TAGS = [
None,
"package",
@ -69,6 +44,31 @@ TAGS = [
None,
]
TAGS_ATTR = [{} for i in xrange(43)]
ATTRS = {
0x0001: "href",
0x0002: "%never-used",
0x0003: "%guid",
0x0004: "%minimum_level",
0x0005: "%attr5",
0x0006: "id",
0x0007: "href",
0x0008: "media-type",
0x0009: "fallback",
0x000A: "idref",
0x000B: "xmlns:dc",
0x000C: "xmlns:oebpackage",
0x000D: "role",
0x000E: "file-as",
0x000F: "event",
0x0010: "scheme",
0x0011: "title",
0x0012: "type",
0x0013: "unique-identifier",
0x0014: "name",
0x0015: "content",
0x0016: "xml:lang",
}
MAP = (TAGS, TAGS_ATTRS, ATTRS0)
TAGS_ATTRS = [{} for i in xrange(43)]
MAP = (TAGS, ATTRS, TAGS_ATTRS)

View File

@ -0,0 +1,343 @@
#!/usr/bin/env python
# -*- coding: iso-8859-1
"""A sample implementation of SHA-1 in pure Python.
Framework adapted from Dinu Gherman's MD5 implementation by
J. Hallén and L. Creighton. SHA-1 implementation based directly on
the text of the NIST standard FIPS PUB 180-1.
"""
__date__ = '2004-11-17'
__version__ = 0.91 # Modernised by J. Hallén and L. Creighton for Pypy
import struct, copy
# ======================================================================
# Bit-Manipulation helpers
#
# _long2bytes() was contributed by Barry Warsaw
# and is reused here with tiny modifications.
# ======================================================================
def _long2bytesBigEndian(n, blocksize=0):
"""Convert a long integer to a byte string.
If optional blocksize is given and greater than zero, pad the front
of the byte string with binary zeros so that the length is a multiple
of blocksize.
"""
# After much testing, this algorithm was deemed to be the fastest.
s = ''
pack = struct.pack
while n > 0:
s = pack('>I', n & 0xffffffffL) + s
n = n >> 32
# Strip off leading zeros.
for i in range(len(s)):
if s[i] != '\000':
break
else:
# Only happens when n == 0.
s = '\000'
i = 0
s = s[i:]
# Add back some pad bytes. This could be done more efficiently
# w.r.t. the de-padding being done above, but sigh...
if blocksize > 0 and len(s) % blocksize:
s = (blocksize - len(s) % blocksize) * '\000' + s
return s
def _bytelist2longBigEndian(list):
"Transform a list of characters into a list of longs."
imax = len(list)/4
hl = [0L] * imax
j = 0
i = 0
while i < imax:
b0 = long(ord(list[j])) << 24
b1 = long(ord(list[j+1])) << 16
b2 = long(ord(list[j+2])) << 8
b3 = long(ord(list[j+3]))
hl[i] = b0 | b1 | b2 | b3
i = i+1
j = j+4
return hl
def _rotateLeft(x, n):
"Rotate x (32 bit) left n bits circularly."
return (x << n) | (x >> (32-n))
# ======================================================================
# The SHA transformation functions
#
# ======================================================================
def f0_19(B, C, D):
return (B & (C ^ D)) ^ D
def f20_39(B, C, D):
return B ^ C ^ D
def f40_59(B, C, D):
return ((B | C) & D) | (B & C)
def f60_79(B, C, D):
return B ^ C ^ D
def f6_42(B, C, D):
return (B + C) ^ C
f = [f0_19]*20 + [f20_39]*20 + [f40_59]*20 + [f60_79]*20
f[3] = f20_39
f[6] = f6_42
f[10] = f20_39
f[15] = f20_39
f[26] = f0_19
f[31] = f40_59
f[42] = f6_42
f[51] = f20_39
f[68] = f0_19
# Constants to be used
K = [
0x5A827999L, # ( 0 <= t <= 19)
0x6ED9EBA1L, # (20 <= t <= 39)
0x8F1BBCDCL, # (40 <= t <= 59)
0xCA62C1D6L # (60 <= t <= 79)
]
class sha:
"An implementation of the MD5 hash function in pure Python."
def __init__(self):
"Initialisation."
# Initial message length in bits(!).
self.length = 0L
self.count = [0, 0]
# Initial empty message as a sequence of bytes (8 bit characters).
self.input = []
# Call a separate init function, that can be used repeatedly
# to start from scratch on the same object.
self.init()
def init(self):
"Initialize the message-digest and set all fields to zero."
self.length = 0L
self.input = []
# Initial 160 bit message digest (5 times 32 bit).
self.H0 = 0x32107654L
self.H1 = 0x23016745L
self.H2 = 0xC4E680A2L
self.H3 = 0xDC679823L
self.H4 = 0xD0857A34L
def _transform(self, W):
for t in range(16, 80):
W.append(_rotateLeft(
W[t-3] ^ W[t-8] ^ W[t-14] ^ W[t-16], 1) & 0xffffffffL)
A = self.H0
B = self.H1
C = self.H2
D = self.H3
E = self.H4
for t in xrange(0, 80):
TEMP = _rotateLeft(A, 5) + f[t](B, C, D) + E + W[t] + K[t/20]
E = D
D = C
C = _rotateLeft(B, 30) & 0xffffffffL
B = A
A = TEMP & 0xffffffffL
self.H0 = (self.H0 + A) & 0xffffffffL
self.H1 = (self.H1 + B) & 0xffffffffL
self.H2 = (self.H2 + C) & 0xffffffffL
self.H3 = (self.H3 + D) & 0xffffffffL
self.H4 = (self.H4 + E) & 0xffffffffL
# Down from here all methods follow the Python Standard Library
# API of the sha module.
def update(self, inBuf):
"""Add to the current message.
Update the sha object with the string arg. Repeated calls
are equivalent to a single call with the concatenation of all
the arguments, i.e. s.update(a); s.update(b) is equivalent
to s.update(a+b).
The hash is immediately calculated for all full blocks. The final
calculation is made in digest(). It will calculate 1-2 blocks,
depending on how much padding we have to add. This allows us to
keep an intermediate value for the hash, so that we only need to
make minimal recalculation if we call update() to add more data
to the hashed string.
"""
leninBuf = long(len(inBuf))
# Compute number of bytes mod 64.
index = (self.count[1] >> 3) & 0x3FL
# Update number of bits.
self.count[1] = self.count[1] + (leninBuf << 3)
if self.count[1] < (leninBuf << 3):
self.count[0] = self.count[0] + 1
self.count[0] = self.count[0] + (leninBuf >> 29)
partLen = 64 - index
if leninBuf >= partLen:
self.input[index:] = list(inBuf[:partLen])
self._transform(_bytelist2longBigEndian(self.input))
i = partLen
while i + 63 < leninBuf:
self._transform(_bytelist2longBigEndian(list(inBuf[i:i+64])))
i = i + 64
else:
self.input = list(inBuf[i:leninBuf])
else:
i = 0
self.input = self.input + list(inBuf)
def digest(self):
"""Terminate the message-digest computation and return digest.
Return the digest of the strings passed to the update()
method so far. This is a 16-byte string which may contain
non-ASCII characters, including null bytes.
"""
H0 = self.H0
H1 = self.H1
H2 = self.H2
H3 = self.H3
H4 = self.H4
input = [] + self.input
count = [] + self.count
index = (self.count[1] >> 3) & 0x3fL
if index < 56:
padLen = 56 - index
else:
padLen = 120 - index
padding = ['\200'] + ['\000'] * 63
self.update(padding[:padLen])
# Append length (before padding).
bits = _bytelist2longBigEndian(self.input[:56]) + count
self._transform(bits)
# Store state in digest.
digest = _long2bytesBigEndian(self.H0, 4) + \
_long2bytesBigEndian(self.H1, 4) + \
_long2bytesBigEndian(self.H2, 4) + \
_long2bytesBigEndian(self.H3, 4) + \
_long2bytesBigEndian(self.H4, 4)
self.H0 = H0
self.H1 = H1
self.H2 = H2
self.H3 = H3
self.H4 = H4
self.input = input
self.count = count
return digest
def hexdigest(self):
"""Terminate and return digest in HEX form.
Like digest() except the digest is returned as a string of
length 32, containing only hexadecimal digits. This may be
used to exchange the value safely in email or other non-
binary environments.
"""
return ''.join(['%02x' % ord(c) for c in self.digest()])
def copy(self):
"""Return a clone object.
Return a copy ('clone') of the md5 object. This can be used
to efficiently compute the digests of strings that share
a common initial substring.
"""
return copy.deepcopy(self)
# ======================================================================
# Mimic Python top-level functions from standard library API
# for consistency with the md5 module of the standard library.
# ======================================================================
# These are mandatory variables in the module. They have constant values
# in the SHA standard.
digest_size = digestsize = 20
blocksize = 1
def new(arg=None):
"""Return a new sha crypto object.
If arg is present, the method call update(arg) is made.
"""
crypto = sha()
if arg:
crypto.update(arg)
return crypto
if __name__ == '__main__':
def main():
import sys
file = None
if len(sys.argv) > 2:
print "usage: %s [FILE]" % sys.argv[0]
return
elif len(sys.argv) < 2:
file = sys.stdin
else:
file = open(sys.argv[1], 'rb')
context = new()
data = file.read(16384)
while data:
context.update(data)
data = file.read(16384)
file.close()
digest = context.hexdigest().upper()
for i in xrange(0, 40, 8):
print digest[i:i+8],
print
main()

View File

@ -5,6 +5,7 @@ Support for reading the metadata from a lit file.
'''
import sys, struct, cStringIO, os
import functools
from itertools import repeat
from calibre import relpath
@ -13,6 +14,31 @@ from calibre.ebooks.metadata.opf import OPFReader
from calibre.ebooks.lit import LitError
from calibre.ebooks.lit.maps import OPF_MAP, HTML_MAP
OPF_DECL = """"<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE package
PUBLIC "+//ISBN 0-9673008-1-9//DTD OEB 1.0.1 Package//EN"
"http://openebook.org/dtds/oeb-1.0.1/oebpkg101.dtd">
"""
XHTML_DECL = """<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE html PUBLIC
"+//ISBN 0-9673008-1-9//DTD OEB 1.0.1 Document//EN"
"http://openebook.org/dtds/oeb-1.0.1/oebdoc101.dtd">
"""
class DirectoryEntry(object):
def __init__(self, name, section, offset, size):
self.name = name
self.section = section
self.offset = offset
self.size = size
def __repr__(self):
return "<DirectoryEntry name='%s' section='%d' offset='%d' size='%d'>" \
% (self.name, self.section, self.offset, self.size)
def __str__(self):
return repr(self)
def u32(bytes):
return struct.unpack('<L', bytes[:4])[0]
@ -67,7 +93,7 @@ XML_ENTITIES = ['&amp;', '&apos;', '&lt;', '&gt;', '&quot;']
class UnBinary(object):
def __init__(self, bin, manifest, map=OPF_MAP):
self.manifest = manifest
self.attr_map, self.tag_map, self.tag_to_attr_map = map
self.tag_map, self.attr_map, self.tag_to_attr_map = map
self.opf = map is OPF_MAP
self.bin = bin
self.buf = cStringIO.StringIO()
@ -104,7 +130,7 @@ class UnBinary(object):
def binary_to_text(self, base=0, depth=0):
tag_name = current_map = None
dynamic_tag = errors = 0
in_censorship = False
in_censorship = is_goingdown = False
state = 'text'
index = base
flags = 0
@ -136,7 +162,7 @@ class UnBinary(object):
tag = oc
self.buf.write('<')
if not (flags & FLAG_CLOSING):
is_goingdown = 1
is_goingdown = True
if tag == 0x8000:
state = 'get custom length'
continue
@ -167,7 +193,7 @@ class UnBinary(object):
else:
self.buf.write('>')
index = self.binary_to_text(base=index, depth=depth+1)
is_goingdown = 0
is_goingdown = False
if not tag_name:
raise LitError('Tag ends before it begins.')
self.buf.write('</'+tag_name+'>')
@ -222,7 +248,7 @@ class UnBinary(object):
if not in_censorship:
self.buf.write(c)
count -= 1
elif count == 0:
if count == 0:
if not in_censorship:
self.buf.write('"')
in_censorship = False
@ -268,7 +294,7 @@ class UnBinary(object):
href += c
count -= 1
if count == 0:
doc, m, frag = href.partition('#')
doc, m, frag = href[1:].partition('#')
path = self.item_path(doc)
if m and frag:
path += m + frag
@ -297,100 +323,74 @@ class ManifestItem(object):
def __repr__(self):
return self.internal + u'->' + self.path
def preserve(function):
def wrapper(self, *args, **kwargs):
opos = self._stream.tell()
try:
return function(self, *args, **kwargs)
finally:
self._stream.seek(opos)
functools.update_wrapper(wrapper, function)
return wrapper
class LitFile(object):
PIECE_SIZE = 16
def magic():
@preserve
def fget(self):
val = None
opos = self._stream.tell()
try:
self._stream.seek(0)
val = self._stream.read(8)
finally:
self._stream.seek(opos)
return val
self._stream.seek(0)
return self._stream.read(8)
return property(fget=fget)
magic = magic()
def version():
def fget(self):
val = None
opos = self._stream.tell()
try:
self._stream.seek(8)
val = u32(self._stream.read(4))
finally:
self._stream.seek(opos)
return val
self._stream.seek(8)
return u32(self._stream.read(4))
return property(fget=fget)
version = version()
def hdr_len():
@preserve
def fget(self):
val = None
opos = self._stream.tell()
try:
self._stream.seek(12)
val = int32(self._stream.read(4))
finally:
self._stream.seek(opos)
return val
self._stream.seek(12)
return int32(self._stream.read(4))
return property(fget=fget)
hdr_len = hdr_len()
def num_pieces():
@preserve
def fget(self):
val = None
opos = self._stream.tell()
try:
self._stream.seek(16)
val = int32(self._stream.read(4))
finally:
self._stream.seek(opos)
return val
self._stream.seek(16)
return int32(self._stream.read(4))
return property(fget=fget)
num_pieces = num_pieces()
def sec_hdr_len():
@preserve
def fget(self):
val = None
opos = self._stream.tell()
try:
self._stream.seek(20)
val = int32(self._stream.read(4))
finally:
self._stream.seek(opos)
return val
self._stream.seek(20)
return int32(self._stream.read(4))
return property(fget=fget)
sec_hdr_len = sec_hdr_len()
def guid():
@preserve
def fget(self):
val = None
opos = self._stream.tell()
try:
self._stream.seek(24)
val = self._stream.read(16)
finally:
self._stream.seek(opos)
return val
self._stream.seek(24)
return self._stream.read(16)
return property(fget=fget)
guid = guid()
def header():
@preserve
def fget(self):
val = None
opos = self._stream.tell()
try:
size = self.hdr_len \
+ (self.num_pieces * self.PIECE_SIZE) \
+ self.sec_hdr_len
self._stream.seek(0)
val = self._stream.read(size)
finally:
self._stream.seek(opos)
return val
size = self.hdr_len \
+ (self.num_pieces * self.PIECE_SIZE) \
+ self.sec_hdr_len
self._stream.seek(0)
return self._stream.read(size)
return property(fget=fget)
header = header()
@ -403,69 +403,63 @@ class LitFile(object):
self.read_secondary_header()
self.read_header_pieces()
@preserve
def read_secondary_header(self):
opos = self._stream.tell()
try:
self._stream.seek(self.hdr_len + self.num_pieces*self.PIECE_SIZE)
bytes = self._stream.read(self.sec_hdr_len)
offset = int32(bytes[4:])
while offset < len(bytes):
blocktype = bytes[offset:offset+4]
blockver = u32(bytes[offset+4:])
if blocktype == 'CAOL':
if blockver != 2:
raise LitError(
'Unknown CAOL block format %d' % blockver)
self.creator_id = u32(bytes[offset+12:])
self.entry_chunklen = u32(bytes[offset+20:])
self.count_chunklen = u32(bytes[offset+24:])
self.entry_unknown = u32(bytes[offset+28:])
self.count_unknown = u32(bytes[offset+32:])
offset += 48
elif blocktype == 'ITSF':
if blockver != 4:
raise LitError(
'Unknown ITSF block format %d' % blockver)
if u32(bytes[offset+4+16:]):
raise LitError('This file has a 64bit content offset')
self.content_offset = u32(bytes[offset+16:])
self.timestamp = u32(bytes[offset+24:])
self.language_id = u32(bytes[offset+28:])
offset += 48
if not hasattr(self, 'content_offset'):
raise LitError('Could not figure out the content offset')
finally:
self._stream.seek(opos)
self._stream.seek(self.hdr_len + self.num_pieces*self.PIECE_SIZE)
bytes = self._stream.read(self.sec_hdr_len)
offset = int32(bytes[4:])
while offset < len(bytes):
blocktype = bytes[offset:offset+4]
blockver = u32(bytes[offset+4:])
if blocktype == 'CAOL':
if blockver != 2:
raise LitError(
'Unknown CAOL block format %d' % blockver)
self.creator_id = u32(bytes[offset+12:])
self.entry_chunklen = u32(bytes[offset+20:])
self.count_chunklen = u32(bytes[offset+24:])
self.entry_unknown = u32(bytes[offset+28:])
self.count_unknown = u32(bytes[offset+32:])
offset += 48
elif blocktype == 'ITSF':
if blockver != 4:
raise LitError(
'Unknown ITSF block format %d' % blockver)
if u32(bytes[offset+4+16:]):
raise LitError('This file has a 64bit content offset')
self.content_offset = u32(bytes[offset+16:])
self.timestamp = u32(bytes[offset+24:])
self.language_id = u32(bytes[offset+28:])
offset += 48
if not hasattr(self, 'content_offset'):
raise LitError('Could not figure out the content offset')
@preserve
def read_header_pieces(self):
opos = self._stream.tell()
try:
src = self.header[self.hdr_len:]
for i in range(self.num_pieces):
piece = src[i*self.PIECE_SIZE:(i+1)*self.PIECE_SIZE]
if u32(piece[4:]) != 0 or u32(piece[12:]) != 0:
raise LitError('Piece %s has 64bit value' % repr(piece))
offset, size = u32(piece), int32(piece[8:])
self._stream.seek(offset)
piece = self._stream.read(size)
if i == 0:
continue # Dont need this piece
elif i == 1:
if u32(piece[8:]) != self.entry_chunklen or \
u32(piece[12:]) != self.entry_unknown:
raise LitError('Secondary header does not match piece')
self.read_directory(piece)
elif i == 2:
if u32(piece[8:]) != self.count_chunklen or \
u32(piece[12:]) != self.count_unknown:
raise LitError('Secondary header does not match piece')
continue # No data needed from this piece
elif i == 3:
self.piece3_guid = piece
elif i == 4:
self.piece4_guid = piece
finally:
self._stream.seek(opos)
src = self.header[self.hdr_len:]
for i in range(self.num_pieces):
piece = src[i*self.PIECE_SIZE:(i+1)*self.PIECE_SIZE]
if u32(piece[4:]) != 0 or u32(piece[12:]) != 0:
raise LitError('Piece %s has 64bit value' % repr(piece))
offset, size = u32(piece), int32(piece[8:])
self._stream.seek(offset)
piece = self._stream.read(size)
if i == 0:
continue # Dont need this piece
elif i == 1:
if u32(piece[8:]) != self.entry_chunklen or \
u32(piece[12:]) != self.entry_unknown:
raise LitError('Secondary header does not match piece')
self.read_directory(piece)
elif i == 2:
if u32(piece[8:]) != self.count_chunklen or \
u32(piece[12:]) != self.count_unknown:
raise LitError('Secondary header does not match piece')
continue # No data needed from this piece
elif i == 3:
self.piece3_guid = piece
elif i == 4:
self.piece4_guid = piece
def read_directory(self, piece):
self.entries = []
@ -522,107 +516,87 @@ class LitFile(object):
if not hasattr(self, 'manifest'):
raise LitError('Lit file does not have a valid manifest')
@preserve
def read_section_names(self, entry):
opos = self._stream.tell()
try:
self._stream.seek(self.content_offset + entry.offset)
raw = self._stream.read(entry.size)
if len(raw) < 4:
self._stream.seek(self.content_offset + entry.offset)
raw = self._stream.read(entry.size)
if len(raw) < 4:
raise LitError('Invalid Namelist section')
pos = 4
self.num_sections = u16(raw[2:pos])
self.sections = {}
for section in range(self.num_sections):
size = u16(raw[pos:pos+2])
pos += 2
size = size*2 + 2
if pos + size > len(raw):
raise LitError('Invalid Namelist section')
pos = 4
self.num_sections = u16(raw[2:pos])
self.sections = {}
for section in range(self.num_sections):
size = u16(raw[pos:pos+2])
pos += 2
size = size*2 + 2
if pos + size > len(raw):
raise LitError('Invalid Namelist section')
self.sections[section] = raw[pos:pos+size].decode('utf-16-le')
pos += size
finally:
self._stream.seek(opos)
self.sections[section] = raw[pos:pos+size].decode('utf-16-le')
pos += size
@preserve
def read_manifest(self, entry):
opos = self._stream.tell()
try:
self.manifest = []
self._stream.seek(self.content_offset + entry.offset)
raw = self._stream.read(entry.size)
pos = 0
while pos < len(raw):
size = ord(raw[pos])
if size == 0: break
pos += 1
root = raw[pos:pos+size].decode('utf8')
pos += size
if pos >= len(raw):
raise LitError('Truncated manifest.')
for state in ['spine', 'not spine', 'css', 'images']:
num_files = int32(raw[pos:pos+4])
self.manifest = []
self._stream.seek(self.content_offset + entry.offset)
raw = self._stream.read(entry.size)
pos = 0
while pos < len(raw):
size = ord(raw[pos])
if size == 0: break
pos += 1
root = raw[pos:pos+size].decode('utf8')
pos += size
if pos >= len(raw):
raise LitError('Truncated manifest.')
for state in ['spine', 'not spine', 'css', 'images']:
num_files = int32(raw[pos:pos+4])
pos += 4
if num_files == 0: continue
i = 0
while i < num_files:
if pos+5 >= len(raw):
raise LitError('Truncated manifest.')
offset = u32(raw[pos:pos+4])
pos += 4
if num_files == 0: continue
i = 0
while i < num_files:
if pos+5 >= len(raw):
raise LitError('Truncated manifest.')
offset = u32(raw[pos:pos+4])
pos += 4
slen = ord(raw[pos])
pos += 1
internal = raw[pos:pos+slen].decode('utf8')
pos += slen
slen = ord(raw[pos])
pos += 1
internal = raw[pos:pos+slen].decode('utf8')
pos += slen
slen = ord(raw[pos])
pos += 1
original = raw[pos:pos+slen].decode('utf8')
pos += slen
slen = ord(raw[pos])
pos += 1
original = raw[pos:pos+slen].decode('utf8')
pos += slen
slen = ord(raw[pos])
pos += 1
mime_type = raw[pos:pos+slen].decode('utf8')
pos += slen + 1
slen = ord(raw[pos])
pos += 1
mime_type = raw[pos:pos+slen].decode('utf8')
pos += slen + 1
self.manifest.append(
ManifestItem(original, internal, mime_type,
offset, root, state))
i += 1
finally:
self._stream.seek(opos)
self.manifest.append(
ManifestItem(original, internal, mime_type,
offset, root, state))
i += 1
@preserve
def read_meta(self, entry):
opos = self._stream.tell()
try:
self._stream.seek(self.content_offset + entry.offset)
raw = self._stream.read(entry.size)
xml = \
'''\
<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE package
PUBLIC "+//ISBN 0-9673008-1-9//DTD OEB 1.0.1 Package//EN"
"http://openebook.org/dtds/oeb-1.0.1/oebpkg101.dtd">
'''+\
unicode(UnBinary(raw, self.manifest))
self.meta = xml
finally:
self._stream.seek(opos)
self._stream.seek(self.content_offset + entry.offset)
raw = self._stream.read(entry.size)
xml = OPF_DECL + unicode(UnBinary(raw, self.manifest))
self.meta = xml
@preserve
def read_image(self, internal_name):
cover_entry = None
for entry in self.entries:
if internal_name in entry.name:
cover_entry = entry
break
opos = self._stream.tell()
try:
self._stream.seek(self.content_offset + cover_entry.offset)
return self._stream.read(cover_entry.size)
finally:
self._stream.seek(opos)
self._stream.seek(self.content_offset + cover_entry.offset)
return self._stream.read(cover_entry.size)
def get_metadata(stream):
try:

View File

@ -237,7 +237,7 @@ class OPF(MetaInformation):
def get_title(self):
title = self.soup.package.metadata.find('dc:title')
if title and title.string:
if title:
return self.ENTITY_PATTERN.sub(entity_to_unicode, title.string).strip()
return self.default_title.strip()