Converted overflow warnings into errors in LRFMetaFile

Fixed bug where setting a smaller thumbnail would not reduce lrf file size
Raised pylint score for lrf/*.py
This commit is contained in:
Kovid Goyal 2006-12-22 00:32:28 +00:00
parent 8bd8bfb39c
commit 9f4a2c9d72

View File

@ -14,12 +14,14 @@
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. ## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
""" """
This module presents an easy to use interface for getting and setting meta information in LRF files. This module presents an easy to use interface for getting and setting
Just create an L{LRFMetaFile} object and use its properties to get and set meta information. For example: meta information in LRF files.
Just create an L{LRFMetaFile} object and use its properties
to get and set meta information. For example:
>>> lrf = LRFMetaFile("mybook.lrf") >>> lrf = LRFMetaFile("mybook.lrf")
>>> print lrf.title, lrf.author >>> print lrf.title, lrf.author
>>> lrf.category = "History" >>> lrf.category = "History"
""" """
import struct, array, zlib, StringIO import struct, array, zlib, StringIO
@ -33,311 +35,369 @@ DWORD = "<I" #: Unsigned integer little endian encoded in 4 bytes
QWORD = "<Q" #: Unsigned long long little endian encoded in 8 bytes QWORD = "<Q" #: Unsigned long long little endian encoded in 8 bytes
class versioned_field(field): class versioned_field(field):
def __init__(self, vfield, version, start=0, fmt=WORD): def __init__(self, vfield, version, start=0, fmt=WORD):
field.__init__(self, start=start, fmt=fmt) field.__init__(self, start=start, fmt=fmt)
self.vfield, self.version = vfield, version self.vfield, self.version = vfield, version
def enabled(self): def enabled(self):
return self.vfield > self.version return self.vfield > self.version
def __get__(self, obj, typ=None): def __get__(self, obj, typ=None):
if self.enabled(): return field.__get__(self, obj, typ=typ) if self.enabled():
else: return None return field.__get__(self, obj, typ=typ)
else:
return None
def __set__(self, obj, val): def __set__(self, obj, val):
if not self.enabled(): raise LRFException("Trying to set disabled field") if not self.enabled():
else: field.__set__(self, obj, val) raise LRFException("Trying to set disabled field")
else:
field.__set__(self, obj, val)
class LRFException(Exception): class LRFException(Exception):
pass pass
class fixed_stringfield(object): class fixed_stringfield(object):
""" A field storing a variable length string. """ """ A field storing a variable length string. """
def __init__(self, length=8, start=0): def __init__(self, length=8, start=0):
""" """
@param length: Size of this string @param length: Size of this string
@param start: The byte at which this field is stored in the buffer @param start: The byte at which this field is stored in the buffer
""" """
self._length = length self._length = length
self._start = start self._start = start
def __get__(self, obj, typ=None): def __get__(self, obj, typ=None):
length = str(self._length) length = str(self._length)
return obj.unpack(start=self._start, fmt="<"+length+"s")[0] return obj.unpack(start=self._start, fmt="<"+length+"s")[0]
def __set__(self, obj, val): def __set__(self, obj, val):
if val.__class__.__name__ != 'str': val = str(val) if val.__class__.__name__ != 'str': val = str(val)
if len(val) != self._length: raise LRFException("Trying to set fixed_stringfield with a string of incorrect length") if len(val) != self._length:
obj.pack(val, start=self._start, fmt="<"+str(len(val))+"s") raise LRFException("Trying to set fixed_stringfield with a " + \
"string of incorrect length")
obj.pack(val, start=self._start, fmt="<"+str(len(val))+"s")
def __repr__(self): def __repr__(self):
return "A string of length " + str(self._length) + " starting at byte " + str(self._start) return "A string of length " + str(self._length) + \
" starting at byte " + str(self._start)
class xml_field(object): class xml_field(object):
""" """
Descriptor that gets and sets XML based meta information from an LRF file. Descriptor that gets and sets XML based meta information from an LRF file.
Works for simple XML fields of the form <tagname>data</tagname> Works for simple XML fields of the form <tagname>data</tagname>
""" """
def __init__(self, tag_name): def __init__(self, tag_name):
""" @param tag_name: The XML tag whoose data we operate on """ """ @param tag_name: The XML tag whoose data we operate on """
self.tag_name = tag_name self.tag_name = tag_name
def __get__(self, obj, typ=None): def __get__(self, obj, typ=None):
document = dom.parseString(obj.info) document = dom.parseString(obj.info)
elem = document.getElementsByTagName(self.tag_name)[0] elem = document.getElementsByTagName(self.tag_name)[0]
elem.normalize() elem.normalize()
if not elem.hasChildNodes(): return "" if not elem.hasChildNodes():
return elem.firstChild.data.strip() return ""
return elem.firstChild.data.strip()
def __set__(self, obj, val): def __set__(self, obj, val):
document = dom.parseString(obj.info) document = dom.parseString(obj.info)
elem = document.getElementsByTagName(self.tag_name)[0] elem = document.getElementsByTagName(self.tag_name)[0]
elem.normalize() elem.normalize()
while elem.hasChildNodes(): elem.removeChild(elem.lastChild) while elem.hasChildNodes():
elem.appendChild(dom.Text()) elem.removeChild(elem.lastChild)
elem.firstChild.data = val elem.appendChild(dom.Text())
s = StringIO.StringIO() elem.firstChild.data = val
Print(document, s) s = StringIO.StringIO()
obj.info = s.getvalue() Print(document, s)
s.close() obj.info = s.getvalue()
s.close()
def __str__(self): def __str__(self):
return self.tag_name return self.tag_name
def __repr__(self): def __repr__(self):
return "XML Field: " + self.tag_name return "XML Field: " + self.tag_name
class LRFMetaFile(object): class LRFMetaFile(object):
""" Has properties to read and write all Meta information in a LRF file. """ """ Has properties to read and write all Meta information in a LRF file. """
LRF_HEADER = "L\0R\0F\0\0\0" #: The first 8 bytes of all valid LRF files LRF_HEADER = "L\0R\0F\0\0\0" #: The first 8 bytes of all valid LRF files
lrf_header = fixed_stringfield(length=8, start=0) lrf_header = fixed_stringfield(length=8, start=0)
version = field(fmt=WORD, start=8) version = field(fmt=WORD, start=8)
xor_key = field(fmt=WORD, start=10) xor_key = field(fmt=WORD, start=10)
root_object_id = field(fmt=DWORD, start=12) root_object_id = field(fmt=DWORD, start=12)
number_of_objets = field(fmt=QWORD, start=16) number_of_objets = field(fmt=QWORD, start=16)
object_index_offset = field(fmt=QWORD, start=24) object_index_offset = field(fmt=QWORD, start=24)
binding = field(fmt=BYTE, start=36) binding = field(fmt=BYTE, start=36)
dpi = field(fmt=WORD, start=38) dpi = field(fmt=WORD, start=38)
width = field(fmt=WORD, start=42) width = field(fmt=WORD, start=42)
height = field(fmt=WORD, start=44) height = field(fmt=WORD, start=44)
color_depth = field(fmt=BYTE, start=46) color_depth = field(fmt=BYTE, start=46)
toc_object_id = field(fmt=DWORD, start=0x44) toc_object_id = field(fmt=DWORD, start=0x44)
toc_object_offset = field(fmt=DWORD, start=0x48) toc_object_offset = field(fmt=DWORD, start=0x48)
compressed_info_size = field(fmt=WORD, start=0x4c) compressed_info_size = field(fmt=WORD, start=0x4c)
thumbnail_type = versioned_field(version, 800, fmt=WORD, start=0x4e) thumbnail_type = versioned_field(version, 800, fmt=WORD, start=0x4e)
thumbnail_size = versioned_field(version, 800, fmt=DWORD, start=0x50) thumbnail_size = versioned_field(version, 800, fmt=DWORD, start=0x50)
uncompressed_info_size = versioned_field(compressed_info_size, 0, fmt=DWORD, start=0x54) uncompressed_info_size = versioned_field(compressed_info_size, 0, \
fmt=DWORD, start=0x54)
title = xml_field("Title") title = xml_field("Title")
author = xml_field("Author") author = xml_field("Author")
book_id = xml_field("BookID") book_id = xml_field("BookID")
publisher = xml_field("Publisher") publisher = xml_field("Publisher")
label = xml_field("Label") label = xml_field("Label")
category = xml_field("Category") category = xml_field("Category")
language = xml_field("Language") language = xml_field("Language")
creator = xml_field("Creator") creator = xml_field("Creator")
creation_date = xml_field("CreationDate") #: Format is %Y-%m-%d creation_date = xml_field("CreationDate") #: Format is %Y-%m-%d
producer = xml_field("Producer") producer = xml_field("Producer")
page = xml_field("Page") page = xml_field("Page")
def safe(func): def safe(func):
""" Decorator that ensures that function calls leave the pos in the underlying file unchanged """ """
def restore_pos(*args, **kwargs): Decorator that ensures that function calls leave the pos
obj = args[0] in the underlying file unchanged
pos = obj._file.tell() """
res = func(*args, **kwargs) def restore_pos(*args, **kwargs):
obj._file.seek(0,2) obj = args[0]
if obj._file.tell() >= pos: obj._file.seek(pos) pos = obj._file.tell()
return res res = func(*args, **kwargs)
return restore_pos obj._file.seek(0, 2)
if obj._file.tell() >= pos:
obj._file.seek(pos)
return res
return restore_pos
def safe_property(func): def safe_property(func):
""" Decorator that ensures that read or writing a property leaves the position in the underlying file unchanged """ """
def decorator(f): Decorator that ensures that read or writing a property leaves
def restore_pos(*args, **kwargs): the position in the underlying file unchanged
obj = args[0] """
pos = obj._file.tell() def decorator(f):
res = f(*args, **kwargs) def restore_pos(*args, **kwargs):
obj._file.seek(0,2) obj = args[0]
if obj._file.tell() >= pos: obj._file.seek(pos) pos = obj._file.tell()
return res res = f(*args, **kwargs)
return restore_pos obj._file.seek(0, 2)
locals_ = func() if obj._file.tell() >= pos:
if locals_.has_key("fget"): locals_["fget"] = decorator(locals_["fget"]) obj._file.seek(pos)
if locals_.has_key("fset"): locals_["fset"] = decorator(locals_["fset"]) return res
return property(**locals_) return restore_pos
locals_ = func()
if locals_.has_key("fget"):
locals_["fget"] = decorator(locals_["fget"])
if locals_.has_key("fset"):
locals_["fset"] = decorator(locals_["fset"])
return property(**locals_)
@safe_property @safe_property
def info(): def info():
doc=""" Document meta information in raw XML format """ doc = """ Document meta information in raw XML format """
def fget(self): def fget(self):
if self.compressed_info_size == 0: if self.compressed_info_size == 0:
raise LRFException("This document has no meta info") raise LRFException("This document has no meta info")
size = self.compressed_info_size - 4 size = self.compressed_info_size - 4
self._file.seek(self.info_start) self._file.seek(self.info_start)
try: try:
stream = zlib.decompress(self._file.read(size)) stream = zlib.decompress(self._file.read(size))
if len(stream) != self.uncompressed_info_size: if len(stream) != self.uncompressed_info_size:
raise LRFException("Decompression of document meta info yielded unexpected results") raise LRFException("Decompression of document meta info\
return stream yielded unexpected results")
except zlib.error, e: return stream
raise LRFException("Unable to decompress document meta information") except zlib.error, e:
raise LRFException("Unable to decompress document meta information")
def fset(self, info): def fset(self, info):
self.uncompressed_info_size = len(info) self.uncompressed_info_size = len(info)
stream = zlib.compress(info) stream = zlib.compress(info)
self.compressed_info_size = len(stream) + 4 self.compressed_info_size = len(stream) + 4
self._file.seek(self.info_start) self._file.seek(self.info_start)
self._file.write(stream) self._file.write(stream)
self._file.flush() self._file.flush()
return locals() return { "fget":fget, "fset":fset, "doc":doc }
@safe_property @safe_property
def thumbnail_pos(): def thumbnail_pos():
doc=""" The position of the thumbnail in the LRF file """ doc = """ The position of the thumbnail in the LRF file """
def fget(self): def fget(self):
return self.info_start+ self.compressed_info_size-4 return self.info_start+ self.compressed_info_size-4
return locals() return { "fget":fget, "doc":doc }
@safe_property @safe_property
def thumbnail(): def thumbnail():
doc=\ doc = \
""" The thumbnail. Represented as a string. The string you would get from the file read function. """ """
def fget(self): The thumbnail.
size = self.thumbnail_size Represented as a string.
if size: The string you would get from the file read function.
self._file.seek(self.thumbnail_pos) """
return self._file.read(size) def fget(self):
size = self.thumbnail_size
if size:
self._file.seek(self.thumbnail_pos)
return self._file.read(size)
def fset(self, data): def fset(self, data):
if self.version <= 800: raise LRFException("Cannot store thumbnails in LRF files of version <= 800") if self.version <= 800:
orig_size = self.thumbnail_size raise LRFException("Cannot store thumbnails in LRF files \
self._file.seek(self.toc_object_offset) of version <= 800")
toc = self._file.read(self.object_index_offset - self.toc_object_offset) orig_size = self.thumbnail_size
self._file.seek(self.object_index_offset) self._file.seek(self.toc_object_offset)
objects = self._file.read() toc = self._file.read(self.object_index_offset - self.toc_object_offset)
self.thumbnail_size = len(data) self._file.seek(self.object_index_offset)
self._file.seek(self.thumbnail_pos) objects = self._file.read()
self._file.write(data) self.thumbnail_size = len(data)
orig_offset = self.toc_object_offset self._file.seek(self.thumbnail_pos)
self.toc_object_offset = self._file.tell() self._file.write(data)
self._file.write(toc) orig_offset = self.toc_object_offset
self.object_index_offset = self._file.tell() self.toc_object_offset = self._file.tell()
self._file.write(objects) self._file.write(toc)
ttype = 0x14 self.object_index_offset = self._file.tell()
if data[1:4] == "PNG": ttype = 0x12 self._file.write(objects)
if data[0:2] == "BM": ttype = 0x13 self._file.flush()
if data[0:4] == "JIFF": ttype = 0x11 self._file.truncate() # Incase old thumbnail was bigger than new
self.thumbnail_type = ttype ttype = 0x14
self._file.flush() if data[1:4] == "PNG":
self.update_object_offsets(self.toc_object_offset - orig_offset) # Needed as new thumbnail may have different size than old thumbnail ttype = 0x12
return locals() if data[0:2] == "BM":
ttype = 0x13
if data[0:4] == "JIFF":
ttype = 0x11
self.thumbnail_type = ttype
# Needed as new thumbnail may have different size than old thumbnail
self.update_object_offsets(self.toc_object_offset - orig_offset)
return { "fget":fget, "fset":fset, "doc":doc }
def __init__(self, file): def __init__(self, file):
""" @param file: A file object opened in the r+b mode """ """ @param file: A file object opened in the r+b mode """
file.seek(0,2) file.seek(0, 2)
self.size = file.tell() self.size = file.tell()
self._file = file self._file = file
if self.lrf_header != LRFMetaFile.LRF_HEADER: if self.lrf_header != LRFMetaFile.LRF_HEADER:
raise LRFException(file.name + " has an invalid LRF header. Are you sure it is an LRF file?") raise LRFException(file.name + \
self.info_start = 0x58 if self.version > 800 else 0x53 #: Byte at which the compressed meta information starts " has an invalid LRF header. Are you sure it is an LRF file?")
# Byte at which the compressed meta information starts
self.info_start = 0x58 if self.version > 800 else 0x53
@safe @safe
def update_object_offsets(self, delta): def update_object_offsets(self, delta):
""" Run through the LRF Object index changing the offset by C{delta}. """ """ Run through the LRF Object index changing the offset by C{delta}. """
self._file.seek(self.object_index_offset) self._file.seek(self.object_index_offset)
while(True): while(True):
try: self._file.read(4) try:
except EOFError: break self._file.read(4)
pos = self._file.tell() except EOFError:
try: offset = self.unpack(fmt=DWORD, start=pos)[0] + delta break
except struct.error: break pos = self._file.tell()
self.pack(offset, fmt=DWORD, start=pos) try:
try: self._file.read(12) offset = self.unpack(fmt=DWORD, start=pos)[0] + delta
except EOFError: break except struct.error:
self._file.flush() break
self.pack(offset, fmt=DWORD, start=pos)
try:
self._file.read(12)
except EOFError:
break
self._file.flush()
@safe @safe
def unpack(self, fmt=DWORD, start=0): def unpack(self, fmt=DWORD, start=0):
""" """
Return decoded data from file. Return decoded data from file.
@param fmt: See U{struct<http://docs.python.org/lib/module-struct.html>} @param fmt: See U{struct<http://docs.python.org/lib/module-struct.html>}
@param start: Position in file from which to decode @param start: Position in file from which to decode
""" """
end = start + struct.calcsize(fmt) end = start + struct.calcsize(fmt)
self._file.seek(start) self._file.seek(start)
ret = struct.unpack(fmt, self._file.read(end-start)) self._file.seek(start)
return ret ret = struct.unpack(fmt, self._file.read(end-start))
return ret
@safe @safe
def pack(self, *args, **kwargs): def pack(self, *args, **kwargs):
""" """
Encode C{args} and write them to file. C{kwargs} must contain the keywords C{fmt} and C{start} Encode C{args} and write them to file.
C{kwargs} must contain the keywords C{fmt} and C{start}
@param args: The values to pack @param args: The values to pack
@param fmt: See U{struct<http://docs.python.org/lib/module-struct.html>} @param fmt: See U{struct<http://docs.python.org/lib/module-struct.html>}
@param start: Position in file at which to write encoded data @param start: Position in file at which to write encoded data
""" """
encoded = struct.pack(kwargs["fmt"], *args) encoded = struct.pack(kwargs["fmt"], *args)
self._file.seek(kwargs["start"]) self._file.seek(kwargs["start"])
self._file.write(encoded) self._file.write(encoded)
self._file.flush() self._file.flush()
def __add__(self, tb): def thumbail_extension(self):
""" Return a LRFFile rather than a list as the sum """ ext = "gif"
return LRFFile(list.__add__(self, tb)) ttype = self.thumbnail_type
if ttype == 0x11:
def __getslice__(self, start, end): ext = "jpeg"
""" Return a LRFFile rather than a list as the slice """ elif ttype == 0x12:
return LRFFile(list.__getslice__(self, start, end)) ext = "png"
elif ttype == 0x13:
def thumbail_extension(self): ext = "bm"
ext = "gif" return ext
ttype = self.thumbnail_type
if ttype == 0x11: ext = "jpeg"
elif ttype == 0x12: ext = "png"
elif ttype == 0x13: ext = "bm"
return ext
def main(): def main():
import sys, os.path import sys, os.path
from optparse import OptionParser from optparse import OptionParser
from libprs500 import __version__ as VERSION from libprs500 import __version__ as VERSION
parser = OptionParser(usage="usage: %prog [options] mybook.lrf\n\nWARNING: Based on reverse engineering the LRF format. Making changes may render your LRF file unreadable. ", version=VERSION) parser = OptionParser(usage="usage: %prog [options] mybook.lrf\n\
parser.add_option("-t", "--title", action="store", type="string", dest="title", help="Set the book title") \nWARNING: Based on reverse engineering the LRF format."+\
parser.add_option("-a", "--author", action="store", type="string", dest="author", help="Set the author") " Making changes may render your LRF file unreadable. ", \
parser.add_option("-c", "--category", action="store", type="string", dest="category", help="The category this book belongs to. E.g.: History") version=VERSION)
parser.add_option("--thumbnail", action="store", type="string", dest="thumbnail", help="Path to a graphic that will be set as this files' thumbnail") parser.add_option("-t", "--title", action="store", type="string", \
parser.add_option("--get-thumbnail", action="store_true", dest="get_thumbnail", default=False, help="Extract thumbnail from LRF file") dest="title", help="Set the book title")
parser.add_option("-p", "--page", action="store", type="string", dest="page", help="Don't know what this is for") parser.add_option("-a", "--author", action="store", type="string", \
options, args = parser.parse_args() dest="author", help="Set the author")
if len(args) != 1: parser.add_option("-c", "--category", action="store", type="string", \
parser.print_help() dest="category", help="The category this book belongs"+\
sys.exit(1) " to. E.g.: History")
lrf = LRFMetaFile(open(args[0], "r+b")) parser.add_option("--thumbnail", action="store", type="string", \
if options.title: lrf.title = options.title dest="thumbnail", help="Path to a graphic that will be"+\
if options.author: lrf.author = options.author " set as this files' thumbnail")
if options.category: lrf.category = options.category parser.add_option("--get-thumbnail", action="store_true", \
if options.page: lrf.page = options.page dest="get_thumbnail", default=False, \
if options.thumbnail: help="Extract thumbnail from LRF file")
f = open(options.thumbnail, "r") parser.add_option("-p", "--page", action="store", type="string", \
lrf.thumbnail = f.read() dest="page", help="Don't know what this is for")
f.close() options, args = parser.parse_args()
if len(args) != 1:
parser.print_help()
sys.exit(1)
lrf = LRFMetaFile(open(args[0], "r+b"))
if options.title:
lrf.title = options.title
if options.author:
lrf.author = options.author
if options.category:
lrf.category = options.category
if options.page:
lrf.page = options.page
if options.thumbnail:
f = open(options.thumbnail, "r")
lrf.thumbnail = f.read()
f.close()
if options.get_thumbnail: if options.get_thumbnail:
t = lrf.thumbnail t = lrf.thumbnail
td = "None" td = "None"
if t and len(t) > 0: if t and len(t) > 0:
td = os.path.basename(args[0])+"_thumbnail_."+lrf.thumbail_extension() td = os.path.basename(args[0])+"_thumbnail_."+lrf.thumbail_extension()
f = open(td, "w") f = open(td, "w")
f.write(t) f.write(t)
f.close() f.close()
fields = LRFMetaFile.__dict__.items() fields = LRFMetaFile.__dict__.items()
for f in fields: for f in fields:
if "XML" in str(f): if "XML" in str(f):
print str(f[1]) + ":", lrf.__getattribute__(f[0]) print str(f[1]) + ":", lrf.__getattribute__(f[0])
if options.get_thumbnail: print "Thumbnail:", td if options.get_thumbnail:
print "Thumbnail:", td
# This turns overflow warnings into errors
import warnings
warnings.simplefilter("error", DeprecationWarning)