Converted overflow warnings into errors in LRFMetaFile

Fixed bug where setting a smaller thumbnail would not reduce lrf file size
Raised pylint score for lrf/*.py
This commit is contained in:
Kovid Goyal 2006-12-22 00:32:28 +00:00
parent 8bd8bfb39c
commit 9f4a2c9d72

View File

@ -14,12 +14,14 @@
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. ## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
""" """
This module presents an easy to use interface for getting and setting meta information in LRF files. This module presents an easy to use interface for getting and setting
Just create an L{LRFMetaFile} object and use its properties to get and set meta information. For example: meta information in LRF files.
Just create an L{LRFMetaFile} object and use its properties
>>> lrf = LRFMetaFile("mybook.lrf") to get and set meta information. For example:
>>> print lrf.title, lrf.author
>>> lrf.category = "History" >>> lrf = LRFMetaFile("mybook.lrf")
>>> print lrf.title, lrf.author
>>> lrf.category = "History"
""" """
import struct, array, zlib, StringIO import struct, array, zlib, StringIO
@ -33,311 +35,369 @@ DWORD = "<I" #: Unsigned integer little endian encoded in 4 bytes
QWORD = "<Q" #: Unsigned long long little endian encoded in 8 bytes QWORD = "<Q" #: Unsigned long long little endian encoded in 8 bytes
class versioned_field(field): class versioned_field(field):
def __init__(self, vfield, version, start=0, fmt=WORD): def __init__(self, vfield, version, start=0, fmt=WORD):
field.__init__(self, start=start, fmt=fmt) field.__init__(self, start=start, fmt=fmt)
self.vfield, self.version = vfield, version self.vfield, self.version = vfield, version
def enabled(self): def enabled(self):
return self.vfield > self.version return self.vfield > self.version
def __get__(self, obj, typ=None): def __get__(self, obj, typ=None):
if self.enabled(): return field.__get__(self, obj, typ=typ) if self.enabled():
else: return None return field.__get__(self, obj, typ=typ)
else:
return None
def __set__(self, obj, val): def __set__(self, obj, val):
if not self.enabled(): raise LRFException("Trying to set disabled field") if not self.enabled():
else: field.__set__(self, obj, val) raise LRFException("Trying to set disabled field")
else:
field.__set__(self, obj, val)
class LRFException(Exception): class LRFException(Exception):
pass pass
class fixed_stringfield(object): class fixed_stringfield(object):
""" A field storing a variable length string. """ """ A field storing a variable length string. """
def __init__(self, length=8, start=0): def __init__(self, length=8, start=0):
""" """
@param length: Size of this string @param length: Size of this string
@param start: The byte at which this field is stored in the buffer @param start: The byte at which this field is stored in the buffer
""" """
self._length = length self._length = length
self._start = start self._start = start
def __get__(self, obj, typ=None): def __get__(self, obj, typ=None):
length = str(self._length) length = str(self._length)
return obj.unpack(start=self._start, fmt="<"+length+"s")[0] return obj.unpack(start=self._start, fmt="<"+length+"s")[0]
def __set__(self, obj, val): def __set__(self, obj, val):
if val.__class__.__name__ != 'str': val = str(val) if val.__class__.__name__ != 'str': val = str(val)
if len(val) != self._length: raise LRFException("Trying to set fixed_stringfield with a string of incorrect length") if len(val) != self._length:
obj.pack(val, start=self._start, fmt="<"+str(len(val))+"s") raise LRFException("Trying to set fixed_stringfield with a " + \
"string of incorrect length")
obj.pack(val, start=self._start, fmt="<"+str(len(val))+"s")
def __repr__(self): def __repr__(self):
return "A string of length " + str(self._length) + " starting at byte " + str(self._start) return "A string of length " + str(self._length) + \
" starting at byte " + str(self._start)
class xml_field(object): class xml_field(object):
""" """
Descriptor that gets and sets XML based meta information from an LRF file. Descriptor that gets and sets XML based meta information from an LRF file.
Works for simple XML fields of the form <tagname>data</tagname> Works for simple XML fields of the form <tagname>data</tagname>
""" """
def __init__(self, tag_name): def __init__(self, tag_name):
""" @param tag_name: The XML tag whoose data we operate on """ """ @param tag_name: The XML tag whoose data we operate on """
self.tag_name = tag_name self.tag_name = tag_name
def __get__(self, obj, typ=None): def __get__(self, obj, typ=None):
document = dom.parseString(obj.info) document = dom.parseString(obj.info)
elem = document.getElementsByTagName(self.tag_name)[0] elem = document.getElementsByTagName(self.tag_name)[0]
elem.normalize() elem.normalize()
if not elem.hasChildNodes(): return "" if not elem.hasChildNodes():
return elem.firstChild.data.strip() return ""
return elem.firstChild.data.strip()
def __set__(self, obj, val): def __set__(self, obj, val):
document = dom.parseString(obj.info) document = dom.parseString(obj.info)
elem = document.getElementsByTagName(self.tag_name)[0] elem = document.getElementsByTagName(self.tag_name)[0]
elem.normalize() elem.normalize()
while elem.hasChildNodes(): elem.removeChild(elem.lastChild) while elem.hasChildNodes():
elem.appendChild(dom.Text()) elem.removeChild(elem.lastChild)
elem.firstChild.data = val elem.appendChild(dom.Text())
s = StringIO.StringIO() elem.firstChild.data = val
Print(document, s) s = StringIO.StringIO()
obj.info = s.getvalue() Print(document, s)
s.close() obj.info = s.getvalue()
s.close()
def __str__(self): def __str__(self):
return self.tag_name return self.tag_name
def __repr__(self): def __repr__(self):
return "XML Field: " + self.tag_name return "XML Field: " + self.tag_name
class LRFMetaFile(object): class LRFMetaFile(object):
""" Has properties to read and write all Meta information in a LRF file. """ """ Has properties to read and write all Meta information in a LRF file. """
LRF_HEADER = "L\0R\0F\0\0\0" #: The first 8 bytes of all valid LRF files LRF_HEADER = "L\0R\0F\0\0\0" #: The first 8 bytes of all valid LRF files
lrf_header = fixed_stringfield(length=8, start=0)
version = field(fmt=WORD, start=8)
xor_key = field(fmt=WORD, start=10)
root_object_id = field(fmt=DWORD, start=12)
number_of_objets = field(fmt=QWORD, start=16)
object_index_offset = field(fmt=QWORD, start=24)
binding = field(fmt=BYTE, start=36)
dpi = field(fmt=WORD, start=38)
width = field(fmt=WORD, start=42)
height = field(fmt=WORD, start=44)
color_depth = field(fmt=BYTE, start=46)
toc_object_id = field(fmt=DWORD, start=0x44)
toc_object_offset = field(fmt=DWORD, start=0x48)
compressed_info_size = field(fmt=WORD, start=0x4c)
thumbnail_type = versioned_field(version, 800, fmt=WORD, start=0x4e)
thumbnail_size = versioned_field(version, 800, fmt=DWORD, start=0x50)
uncompressed_info_size = versioned_field(compressed_info_size, 0, fmt=DWORD, start=0x54)
title = xml_field("Title")
author = xml_field("Author")
book_id = xml_field("BookID")
publisher = xml_field("Publisher")
label = xml_field("Label")
category = xml_field("Category")
language = xml_field("Language")
creator = xml_field("Creator")
creation_date = xml_field("CreationDate") #: Format is %Y-%m-%d
producer = xml_field("Producer")
page = xml_field("Page")
def safe(func):
""" Decorator that ensures that function calls leave the pos in the underlying file unchanged """
def restore_pos(*args, **kwargs):
obj = args[0]
pos = obj._file.tell()
res = func(*args, **kwargs)
obj._file.seek(0,2)
if obj._file.tell() >= pos: obj._file.seek(pos)
return res
return restore_pos
def safe_property(func): lrf_header = fixed_stringfield(length=8, start=0)
""" Decorator that ensures that read or writing a property leaves the position in the underlying file unchanged """ version = field(fmt=WORD, start=8)
def decorator(f): xor_key = field(fmt=WORD, start=10)
def restore_pos(*args, **kwargs): root_object_id = field(fmt=DWORD, start=12)
obj = args[0] number_of_objets = field(fmt=QWORD, start=16)
pos = obj._file.tell() object_index_offset = field(fmt=QWORD, start=24)
res = f(*args, **kwargs) binding = field(fmt=BYTE, start=36)
obj._file.seek(0,2) dpi = field(fmt=WORD, start=38)
if obj._file.tell() >= pos: obj._file.seek(pos) width = field(fmt=WORD, start=42)
return res height = field(fmt=WORD, start=44)
return restore_pos color_depth = field(fmt=BYTE, start=46)
locals_ = func() toc_object_id = field(fmt=DWORD, start=0x44)
if locals_.has_key("fget"): locals_["fget"] = decorator(locals_["fget"]) toc_object_offset = field(fmt=DWORD, start=0x48)
if locals_.has_key("fset"): locals_["fset"] = decorator(locals_["fset"]) compressed_info_size = field(fmt=WORD, start=0x4c)
return property(**locals_) thumbnail_type = versioned_field(version, 800, fmt=WORD, start=0x4e)
thumbnail_size = versioned_field(version, 800, fmt=DWORD, start=0x50)
@safe_property uncompressed_info_size = versioned_field(compressed_info_size, 0, \
def info(): fmt=DWORD, start=0x54)
doc=""" Document meta information in raw XML format """
def fget(self):
if self.compressed_info_size == 0:
raise LRFException("This document has no meta info")
size = self.compressed_info_size - 4
self._file.seek(self.info_start)
try:
stream = zlib.decompress(self._file.read(size))
if len(stream) != self.uncompressed_info_size:
raise LRFException("Decompression of document meta info yielded unexpected results")
return stream
except zlib.error, e:
raise LRFException("Unable to decompress document meta information")
def fset(self, info): title = xml_field("Title")
self.uncompressed_info_size = len(info) author = xml_field("Author")
stream = zlib.compress(info) book_id = xml_field("BookID")
self.compressed_info_size = len(stream) + 4 publisher = xml_field("Publisher")
self._file.seek(self.info_start) label = xml_field("Label")
self._file.write(stream) category = xml_field("Category")
self._file.flush()
return locals()
@safe_property
def thumbnail_pos():
doc=""" The position of the thumbnail in the LRF file """
def fget(self):
return self.info_start+ self.compressed_info_size-4
return locals()
@safe_property
def thumbnail():
doc=\
""" The thumbnail. Represented as a string. The string you would get from the file read function. """
def fget(self):
size = self.thumbnail_size
if size:
self._file.seek(self.thumbnail_pos)
return self._file.read(size)
def fset(self, data): language = xml_field("Language")
if self.version <= 800: raise LRFException("Cannot store thumbnails in LRF files of version <= 800") creator = xml_field("Creator")
orig_size = self.thumbnail_size creation_date = xml_field("CreationDate") #: Format is %Y-%m-%d
self._file.seek(self.toc_object_offset) producer = xml_field("Producer")
toc = self._file.read(self.object_index_offset - self.toc_object_offset) page = xml_field("Page")
self._file.seek(self.object_index_offset)
objects = self._file.read()
self.thumbnail_size = len(data)
self._file.seek(self.thumbnail_pos)
self._file.write(data)
orig_offset = self.toc_object_offset
self.toc_object_offset = self._file.tell()
self._file.write(toc)
self.object_index_offset = self._file.tell()
self._file.write(objects)
ttype = 0x14
if data[1:4] == "PNG": ttype = 0x12
if data[0:2] == "BM": ttype = 0x13
if data[0:4] == "JIFF": ttype = 0x11
self.thumbnail_type = ttype
self._file.flush()
self.update_object_offsets(self.toc_object_offset - orig_offset) # Needed as new thumbnail may have different size than old thumbnail
return locals()
def __init__(self, file):
""" @param file: A file object opened in the r+b mode """
file.seek(0,2)
self.size = file.tell()
self._file = file
if self.lrf_header != LRFMetaFile.LRF_HEADER:
raise LRFException(file.name + " has an invalid LRF header. Are you sure it is an LRF file?")
self.info_start = 0x58 if self.version > 800 else 0x53 #: Byte at which the compressed meta information starts
@safe def safe(func):
def update_object_offsets(self, delta): """
""" Run through the LRF Object index changing the offset by C{delta}. """ Decorator that ensures that function calls leave the pos
self._file.seek(self.object_index_offset) in the underlying file unchanged
while(True): """
try: self._file.read(4) def restore_pos(*args, **kwargs):
except EOFError: break obj = args[0]
pos = self._file.tell() pos = obj._file.tell()
try: offset = self.unpack(fmt=DWORD, start=pos)[0] + delta res = func(*args, **kwargs)
except struct.error: break obj._file.seek(0, 2)
self.pack(offset, fmt=DWORD, start=pos) if obj._file.tell() >= pos:
try: self._file.read(12) obj._file.seek(pos)
except EOFError: break return res
self._file.flush() return restore_pos
@safe
def unpack(self, fmt=DWORD, start=0):
"""
Return decoded data from file.
@param fmt: See U{struct<http://docs.python.org/lib/module-struct.html>} def safe_property(func):
@param start: Position in file from which to decode """
""" Decorator that ensures that read or writing a property leaves
end = start + struct.calcsize(fmt) the position in the underlying file unchanged
self._file.seek(start) """
ret = struct.unpack(fmt, self._file.read(end-start)) def decorator(f):
return ret def restore_pos(*args, **kwargs):
obj = args[0]
pos = obj._file.tell()
res = f(*args, **kwargs)
obj._file.seek(0, 2)
if obj._file.tell() >= pos:
obj._file.seek(pos)
return res
return restore_pos
locals_ = func()
if locals_.has_key("fget"):
locals_["fget"] = decorator(locals_["fget"])
if locals_.has_key("fset"):
locals_["fset"] = decorator(locals_["fset"])
return property(**locals_)
@safe @safe_property
def pack(self, *args, **kwargs): def info():
""" doc = """ Document meta information in raw XML format """
Encode C{args} and write them to file. C{kwargs} must contain the keywords C{fmt} and C{start} def fget(self):
if self.compressed_info_size == 0:
raise LRFException("This document has no meta info")
size = self.compressed_info_size - 4
self._file.seek(self.info_start)
try:
stream = zlib.decompress(self._file.read(size))
if len(stream) != self.uncompressed_info_size:
raise LRFException("Decompression of document meta info\
yielded unexpected results")
return stream
except zlib.error, e:
raise LRFException("Unable to decompress document meta information")
def fset(self, info):
self.uncompressed_info_size = len(info)
stream = zlib.compress(info)
self.compressed_info_size = len(stream) + 4
self._file.seek(self.info_start)
self._file.write(stream)
self._file.flush()
return { "fget":fget, "fset":fset, "doc":doc }
@param args: The values to pack @safe_property
@param fmt: See U{struct<http://docs.python.org/lib/module-struct.html>} def thumbnail_pos():
@param start: Position in file at which to write encoded data doc = """ The position of the thumbnail in the LRF file """
""" def fget(self):
encoded = struct.pack(kwargs["fmt"], *args) return self.info_start+ self.compressed_info_size-4
self._file.seek(kwargs["start"]) return { "fget":fget, "doc":doc }
self._file.write(encoded)
self._file.flush()
def __add__(self, tb): @safe_property
""" Return a LRFFile rather than a list as the sum """ def thumbnail():
return LRFFile(list.__add__(self, tb)) doc = \
"""
The thumbnail.
Represented as a string.
The string you would get from the file read function.
"""
def fget(self):
size = self.thumbnail_size
if size:
self._file.seek(self.thumbnail_pos)
return self._file.read(size)
def fset(self, data):
if self.version <= 800:
raise LRFException("Cannot store thumbnails in LRF files \
of version <= 800")
orig_size = self.thumbnail_size
self._file.seek(self.toc_object_offset)
toc = self._file.read(self.object_index_offset - self.toc_object_offset)
self._file.seek(self.object_index_offset)
objects = self._file.read()
self.thumbnail_size = len(data)
self._file.seek(self.thumbnail_pos)
self._file.write(data)
orig_offset = self.toc_object_offset
self.toc_object_offset = self._file.tell()
self._file.write(toc)
self.object_index_offset = self._file.tell()
self._file.write(objects)
self._file.flush()
self._file.truncate() # Incase old thumbnail was bigger than new
ttype = 0x14
if data[1:4] == "PNG":
ttype = 0x12
if data[0:2] == "BM":
ttype = 0x13
if data[0:4] == "JIFF":
ttype = 0x11
self.thumbnail_type = ttype
# Needed as new thumbnail may have different size than old thumbnail
self.update_object_offsets(self.toc_object_offset - orig_offset)
return { "fget":fget, "fset":fset, "doc":doc }
def __getslice__(self, start, end): def __init__(self, file):
""" Return a LRFFile rather than a list as the slice """ """ @param file: A file object opened in the r+b mode """
return LRFFile(list.__getslice__(self, start, end)) file.seek(0, 2)
self.size = file.tell()
self._file = file
if self.lrf_header != LRFMetaFile.LRF_HEADER:
raise LRFException(file.name + \
" has an invalid LRF header. Are you sure it is an LRF file?")
# Byte at which the compressed meta information starts
self.info_start = 0x58 if self.version > 800 else 0x53
def thumbail_extension(self): @safe
ext = "gif" def update_object_offsets(self, delta):
ttype = self.thumbnail_type """ Run through the LRF Object index changing the offset by C{delta}. """
if ttype == 0x11: ext = "jpeg" self._file.seek(self.object_index_offset)
elif ttype == 0x12: ext = "png" while(True):
elif ttype == 0x13: ext = "bm" try:
return ext self._file.read(4)
except EOFError:
break
pos = self._file.tell()
try:
offset = self.unpack(fmt=DWORD, start=pos)[0] + delta
except struct.error:
break
self.pack(offset, fmt=DWORD, start=pos)
try:
self._file.read(12)
except EOFError:
break
self._file.flush()
def main(): @safe
import sys, os.path def unpack(self, fmt=DWORD, start=0):
from optparse import OptionParser """
from libprs500 import __version__ as VERSION Return decoded data from file.
parser = OptionParser(usage="usage: %prog [options] mybook.lrf\n\nWARNING: Based on reverse engineering the LRF format. Making changes may render your LRF file unreadable. ", version=VERSION)
parser.add_option("-t", "--title", action="store", type="string", dest="title", help="Set the book title") @param fmt: See U{struct<http://docs.python.org/lib/module-struct.html>}
parser.add_option("-a", "--author", action="store", type="string", dest="author", help="Set the author") @param start: Position in file from which to decode
parser.add_option("-c", "--category", action="store", type="string", dest="category", help="The category this book belongs to. E.g.: History") """
parser.add_option("--thumbnail", action="store", type="string", dest="thumbnail", help="Path to a graphic that will be set as this files' thumbnail") end = start + struct.calcsize(fmt)
parser.add_option("--get-thumbnail", action="store_true", dest="get_thumbnail", default=False, help="Extract thumbnail from LRF file") self._file.seek(start)
parser.add_option("-p", "--page", action="store", type="string", dest="page", help="Don't know what this is for") self._file.seek(start)
options, args = parser.parse_args() ret = struct.unpack(fmt, self._file.read(end-start))
if len(args) != 1: return ret
parser.print_help()
sys.exit(1) @safe
lrf = LRFMetaFile(open(args[0], "r+b")) def pack(self, *args, **kwargs):
if options.title: lrf.title = options.title """
if options.author: lrf.author = options.author Encode C{args} and write them to file.
if options.category: lrf.category = options.category C{kwargs} must contain the keywords C{fmt} and C{start}
if options.page: lrf.page = options.page
if options.thumbnail: @param args: The values to pack
f = open(options.thumbnail, "r") @param fmt: See U{struct<http://docs.python.org/lib/module-struct.html>}
lrf.thumbnail = f.read() @param start: Position in file at which to write encoded data
f.close() """
encoded = struct.pack(kwargs["fmt"], *args)
self._file.seek(kwargs["start"])
self._file.write(encoded)
self._file.flush()
def thumbail_extension(self):
ext = "gif"
ttype = self.thumbnail_type
if ttype == 0x11:
ext = "jpeg"
elif ttype == 0x12:
ext = "png"
elif ttype == 0x13:
ext = "bm"
return ext
if options.get_thumbnail: def main():
t = lrf.thumbnail import sys, os.path
td = "None" from optparse import OptionParser
if t and len(t) > 0: from libprs500 import __version__ as VERSION
td = os.path.basename(args[0])+"_thumbnail_."+lrf.thumbail_extension() parser = OptionParser(usage="usage: %prog [options] mybook.lrf\n\
f = open(td, "w") \nWARNING: Based on reverse engineering the LRF format."+\
f.write(t) " Making changes may render your LRF file unreadable. ", \
f.close() version=VERSION)
parser.add_option("-t", "--title", action="store", type="string", \
fields = LRFMetaFile.__dict__.items() dest="title", help="Set the book title")
for f in fields: parser.add_option("-a", "--author", action="store", type="string", \
if "XML" in str(f): dest="author", help="Set the author")
print str(f[1]) + ":", lrf.__getattribute__(f[0]) parser.add_option("-c", "--category", action="store", type="string", \
if options.get_thumbnail: print "Thumbnail:", td dest="category", help="The category this book belongs"+\
" to. E.g.: History")
parser.add_option("--thumbnail", action="store", type="string", \
dest="thumbnail", help="Path to a graphic that will be"+\
" set as this files' thumbnail")
parser.add_option("--get-thumbnail", action="store_true", \
dest="get_thumbnail", default=False, \
help="Extract thumbnail from LRF file")
parser.add_option("-p", "--page", action="store", type="string", \
dest="page", help="Don't know what this is for")
options, args = parser.parse_args()
if len(args) != 1:
parser.print_help()
sys.exit(1)
lrf = LRFMetaFile(open(args[0], "r+b"))
if options.title:
lrf.title = options.title
if options.author:
lrf.author = options.author
if options.category:
lrf.category = options.category
if options.page:
lrf.page = options.page
if options.thumbnail:
f = open(options.thumbnail, "r")
lrf.thumbnail = f.read()
f.close()
if options.get_thumbnail:
t = lrf.thumbnail
td = "None"
if t and len(t) > 0:
td = os.path.basename(args[0])+"_thumbnail_."+lrf.thumbail_extension()
f = open(td, "w")
f.write(t)
f.close()
fields = LRFMetaFile.__dict__.items()
for f in fields:
if "XML" in str(f):
print str(f[1]) + ":", lrf.__getattribute__(f[0])
if options.get_thumbnail:
print "Thumbnail:", td
# This turns overflow warnings into errors
import warnings
warnings.simplefilter("error", DeprecationWarning)