Converted overflow warnings into errors in LRFMetaFile

Fixed bug where setting a smaller thumbnail would not reduce lrf file size
Raised pylint score for lrf/*.py
This commit is contained in:
Kovid Goyal 2006-12-22 00:32:28 +00:00
parent 8bd8bfb39c
commit 9f4a2c9d72

View File

@ -14,12 +14,14 @@
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
"""
This module presents an easy to use interface for getting and setting meta information in LRF files.
Just create an L{LRFMetaFile} object and use its properties to get and set meta information. For example:
>>> lrf = LRFMetaFile("mybook.lrf")
>>> print lrf.title, lrf.author
>>> lrf.category = "History"
This module presents an easy to use interface for getting and setting
meta information in LRF files.
Just create an L{LRFMetaFile} object and use its properties
to get and set meta information. For example:
>>> lrf = LRFMetaFile("mybook.lrf")
>>> print lrf.title, lrf.author
>>> lrf.category = "History"
"""
import struct, array, zlib, StringIO
@ -33,311 +35,369 @@ DWORD = "<I" #: Unsigned integer little endian encoded in 4 bytes
QWORD = "<Q" #: Unsigned long long little endian encoded in 8 bytes
class versioned_field(field):
def __init__(self, vfield, version, start=0, fmt=WORD):
field.__init__(self, start=start, fmt=fmt)
self.vfield, self.version = vfield, version
def __init__(self, vfield, version, start=0, fmt=WORD):
field.__init__(self, start=start, fmt=fmt)
self.vfield, self.version = vfield, version
def enabled(self):
return self.vfield > self.version
def enabled(self):
return self.vfield > self.version
def __get__(self, obj, typ=None):
if self.enabled(): return field.__get__(self, obj, typ=typ)
else: return None
def __get__(self, obj, typ=None):
if self.enabled():
return field.__get__(self, obj, typ=typ)
else:
return None
def __set__(self, obj, val):
if not self.enabled(): raise LRFException("Trying to set disabled field")
else: field.__set__(self, obj, val)
def __set__(self, obj, val):
if not self.enabled():
raise LRFException("Trying to set disabled field")
else:
field.__set__(self, obj, val)
class LRFException(Exception):
pass
pass
class fixed_stringfield(object):
""" A field storing a variable length string. """
def __init__(self, length=8, start=0):
"""
@param length: Size of this string
@param start: The byte at which this field is stored in the buffer
"""
self._length = length
self._start = start
""" A field storing a variable length string. """
def __init__(self, length=8, start=0):
"""
@param length: Size of this string
@param start: The byte at which this field is stored in the buffer
"""
self._length = length
self._start = start
def __get__(self, obj, typ=None):
length = str(self._length)
return obj.unpack(start=self._start, fmt="<"+length+"s")[0]
def __get__(self, obj, typ=None):
length = str(self._length)
return obj.unpack(start=self._start, fmt="<"+length+"s")[0]
def __set__(self, obj, val):
if val.__class__.__name__ != 'str': val = str(val)
if len(val) != self._length: raise LRFException("Trying to set fixed_stringfield with a string of incorrect length")
obj.pack(val, start=self._start, fmt="<"+str(len(val))+"s")
def __set__(self, obj, val):
if val.__class__.__name__ != 'str': val = str(val)
if len(val) != self._length:
raise LRFException("Trying to set fixed_stringfield with a " + \
"string of incorrect length")
obj.pack(val, start=self._start, fmt="<"+str(len(val))+"s")
def __repr__(self):
return "A string of length " + str(self._length) + " starting at byte " + str(self._start)
def __repr__(self):
return "A string of length " + str(self._length) + \
" starting at byte " + str(self._start)
class xml_field(object):
"""
Descriptor that gets and sets XML based meta information from an LRF file.
Works for simple XML fields of the form <tagname>data</tagname>
"""
def __init__(self, tag_name):
""" @param tag_name: The XML tag whoose data we operate on """
self.tag_name = tag_name
"""
Descriptor that gets and sets XML based meta information from an LRF file.
Works for simple XML fields of the form <tagname>data</tagname>
"""
def __init__(self, tag_name):
""" @param tag_name: The XML tag whoose data we operate on """
self.tag_name = tag_name
def __get__(self, obj, typ=None):
document = dom.parseString(obj.info)
elem = document.getElementsByTagName(self.tag_name)[0]
elem.normalize()
if not elem.hasChildNodes(): return ""
return elem.firstChild.data.strip()
def __get__(self, obj, typ=None):
document = dom.parseString(obj.info)
elem = document.getElementsByTagName(self.tag_name)[0]
elem.normalize()
if not elem.hasChildNodes():
return ""
return elem.firstChild.data.strip()
def __set__(self, obj, val):
document = dom.parseString(obj.info)
elem = document.getElementsByTagName(self.tag_name)[0]
elem.normalize()
while elem.hasChildNodes(): elem.removeChild(elem.lastChild)
elem.appendChild(dom.Text())
elem.firstChild.data = val
s = StringIO.StringIO()
Print(document, s)
obj.info = s.getvalue()
s.close()
def __set__(self, obj, val):
document = dom.parseString(obj.info)
elem = document.getElementsByTagName(self.tag_name)[0]
elem.normalize()
while elem.hasChildNodes():
elem.removeChild(elem.lastChild)
elem.appendChild(dom.Text())
elem.firstChild.data = val
s = StringIO.StringIO()
Print(document, s)
obj.info = s.getvalue()
s.close()
def __str__(self):
return self.tag_name
def __repr__(self):
return "XML Field: " + self.tag_name
def __str__(self):
return self.tag_name
def __repr__(self):
return "XML Field: " + self.tag_name
class LRFMetaFile(object):
""" Has properties to read and write all Meta information in a LRF file. """
LRF_HEADER = "L\0R\0F\0\0\0" #: The first 8 bytes of all valid LRF files
lrf_header = fixed_stringfield(length=8, start=0)
version = field(fmt=WORD, start=8)
xor_key = field(fmt=WORD, start=10)
root_object_id = field(fmt=DWORD, start=12)
number_of_objets = field(fmt=QWORD, start=16)
object_index_offset = field(fmt=QWORD, start=24)
binding = field(fmt=BYTE, start=36)
dpi = field(fmt=WORD, start=38)
width = field(fmt=WORD, start=42)
height = field(fmt=WORD, start=44)
color_depth = field(fmt=BYTE, start=46)
toc_object_id = field(fmt=DWORD, start=0x44)
toc_object_offset = field(fmt=DWORD, start=0x48)
compressed_info_size = field(fmt=WORD, start=0x4c)
thumbnail_type = versioned_field(version, 800, fmt=WORD, start=0x4e)
thumbnail_size = versioned_field(version, 800, fmt=DWORD, start=0x50)
uncompressed_info_size = versioned_field(compressed_info_size, 0, fmt=DWORD, start=0x54)
title = xml_field("Title")
author = xml_field("Author")
book_id = xml_field("BookID")
publisher = xml_field("Publisher")
label = xml_field("Label")
category = xml_field("Category")
language = xml_field("Language")
creator = xml_field("Creator")
creation_date = xml_field("CreationDate") #: Format is %Y-%m-%d
producer = xml_field("Producer")
page = xml_field("Page")
def safe(func):
""" Decorator that ensures that function calls leave the pos in the underlying file unchanged """
def restore_pos(*args, **kwargs):
obj = args[0]
pos = obj._file.tell()
res = func(*args, **kwargs)
obj._file.seek(0,2)
if obj._file.tell() >= pos: obj._file.seek(pos)
return res
return restore_pos
""" Has properties to read and write all Meta information in a LRF file. """
LRF_HEADER = "L\0R\0F\0\0\0" #: The first 8 bytes of all valid LRF files
def safe_property(func):
""" Decorator that ensures that read or writing a property leaves the position in the underlying file unchanged """
def decorator(f):
def restore_pos(*args, **kwargs):
obj = args[0]
pos = obj._file.tell()
res = f(*args, **kwargs)
obj._file.seek(0,2)
if obj._file.tell() >= pos: obj._file.seek(pos)
return res
return restore_pos
locals_ = func()
if locals_.has_key("fget"): locals_["fget"] = decorator(locals_["fget"])
if locals_.has_key("fset"): locals_["fset"] = decorator(locals_["fset"])
return property(**locals_)
@safe_property
def info():
doc=""" Document meta information in raw XML format """
def fget(self):
if self.compressed_info_size == 0:
raise LRFException("This document has no meta info")
size = self.compressed_info_size - 4
self._file.seek(self.info_start)
try:
stream = zlib.decompress(self._file.read(size))
if len(stream) != self.uncompressed_info_size:
raise LRFException("Decompression of document meta info yielded unexpected results")
return stream
except zlib.error, e:
raise LRFException("Unable to decompress document meta information")
lrf_header = fixed_stringfield(length=8, start=0)
version = field(fmt=WORD, start=8)
xor_key = field(fmt=WORD, start=10)
root_object_id = field(fmt=DWORD, start=12)
number_of_objets = field(fmt=QWORD, start=16)
object_index_offset = field(fmt=QWORD, start=24)
binding = field(fmt=BYTE, start=36)
dpi = field(fmt=WORD, start=38)
width = field(fmt=WORD, start=42)
height = field(fmt=WORD, start=44)
color_depth = field(fmt=BYTE, start=46)
toc_object_id = field(fmt=DWORD, start=0x44)
toc_object_offset = field(fmt=DWORD, start=0x48)
compressed_info_size = field(fmt=WORD, start=0x4c)
thumbnail_type = versioned_field(version, 800, fmt=WORD, start=0x4e)
thumbnail_size = versioned_field(version, 800, fmt=DWORD, start=0x50)
uncompressed_info_size = versioned_field(compressed_info_size, 0, \
fmt=DWORD, start=0x54)
def fset(self, info):
self.uncompressed_info_size = len(info)
stream = zlib.compress(info)
self.compressed_info_size = len(stream) + 4
self._file.seek(self.info_start)
self._file.write(stream)
self._file.flush()
return locals()
@safe_property
def thumbnail_pos():
doc=""" The position of the thumbnail in the LRF file """
def fget(self):
return self.info_start+ self.compressed_info_size-4
return locals()
@safe_property
def thumbnail():
doc=\
""" The thumbnail. Represented as a string. The string you would get from the file read function. """
def fget(self):
size = self.thumbnail_size
if size:
self._file.seek(self.thumbnail_pos)
return self._file.read(size)
title = xml_field("Title")
author = xml_field("Author")
book_id = xml_field("BookID")
publisher = xml_field("Publisher")
label = xml_field("Label")
category = xml_field("Category")
def fset(self, data):
if self.version <= 800: raise LRFException("Cannot store thumbnails in LRF files of version <= 800")
orig_size = self.thumbnail_size
self._file.seek(self.toc_object_offset)
toc = self._file.read(self.object_index_offset - self.toc_object_offset)
self._file.seek(self.object_index_offset)
objects = self._file.read()
self.thumbnail_size = len(data)
self._file.seek(self.thumbnail_pos)
self._file.write(data)
orig_offset = self.toc_object_offset
self.toc_object_offset = self._file.tell()
self._file.write(toc)
self.object_index_offset = self._file.tell()
self._file.write(objects)
ttype = 0x14
if data[1:4] == "PNG": ttype = 0x12
if data[0:2] == "BM": ttype = 0x13
if data[0:4] == "JIFF": ttype = 0x11
self.thumbnail_type = ttype
self._file.flush()
self.update_object_offsets(self.toc_object_offset - orig_offset) # Needed as new thumbnail may have different size than old thumbnail
return locals()
def __init__(self, file):
""" @param file: A file object opened in the r+b mode """
file.seek(0,2)
self.size = file.tell()
self._file = file
if self.lrf_header != LRFMetaFile.LRF_HEADER:
raise LRFException(file.name + " has an invalid LRF header. Are you sure it is an LRF file?")
self.info_start = 0x58 if self.version > 800 else 0x53 #: Byte at which the compressed meta information starts
language = xml_field("Language")
creator = xml_field("Creator")
creation_date = xml_field("CreationDate") #: Format is %Y-%m-%d
producer = xml_field("Producer")
page = xml_field("Page")
@safe
def update_object_offsets(self, delta):
""" Run through the LRF Object index changing the offset by C{delta}. """
self._file.seek(self.object_index_offset)
while(True):
try: self._file.read(4)
except EOFError: break
pos = self._file.tell()
try: offset = self.unpack(fmt=DWORD, start=pos)[0] + delta
except struct.error: break
self.pack(offset, fmt=DWORD, start=pos)
try: self._file.read(12)
except EOFError: break
self._file.flush()
@safe
def unpack(self, fmt=DWORD, start=0):
"""
Return decoded data from file.
def safe(func):
"""
Decorator that ensures that function calls leave the pos
in the underlying file unchanged
"""
def restore_pos(*args, **kwargs):
obj = args[0]
pos = obj._file.tell()
res = func(*args, **kwargs)
obj._file.seek(0, 2)
if obj._file.tell() >= pos:
obj._file.seek(pos)
return res
return restore_pos
@param fmt: See U{struct<http://docs.python.org/lib/module-struct.html>}
@param start: Position in file from which to decode
"""
end = start + struct.calcsize(fmt)
self._file.seek(start)
ret = struct.unpack(fmt, self._file.read(end-start))
return ret
def safe_property(func):
"""
Decorator that ensures that read or writing a property leaves
the position in the underlying file unchanged
"""
def decorator(f):
def restore_pos(*args, **kwargs):
obj = args[0]
pos = obj._file.tell()
res = f(*args, **kwargs)
obj._file.seek(0, 2)
if obj._file.tell() >= pos:
obj._file.seek(pos)
return res
return restore_pos
locals_ = func()
if locals_.has_key("fget"):
locals_["fget"] = decorator(locals_["fget"])
if locals_.has_key("fset"):
locals_["fset"] = decorator(locals_["fset"])
return property(**locals_)
@safe
def pack(self, *args, **kwargs):
"""
Encode C{args} and write them to file. C{kwargs} must contain the keywords C{fmt} and C{start}
@safe_property
def info():
doc = """ Document meta information in raw XML format """
def fget(self):
if self.compressed_info_size == 0:
raise LRFException("This document has no meta info")
size = self.compressed_info_size - 4
self._file.seek(self.info_start)
try:
stream = zlib.decompress(self._file.read(size))
if len(stream) != self.uncompressed_info_size:
raise LRFException("Decompression of document meta info\
yielded unexpected results")
return stream
except zlib.error, e:
raise LRFException("Unable to decompress document meta information")
def fset(self, info):
self.uncompressed_info_size = len(info)
stream = zlib.compress(info)
self.compressed_info_size = len(stream) + 4
self._file.seek(self.info_start)
self._file.write(stream)
self._file.flush()
return { "fget":fget, "fset":fset, "doc":doc }
@param args: The values to pack
@param fmt: See U{struct<http://docs.python.org/lib/module-struct.html>}
@param start: Position in file at which to write encoded data
"""
encoded = struct.pack(kwargs["fmt"], *args)
self._file.seek(kwargs["start"])
self._file.write(encoded)
self._file.flush()
@safe_property
def thumbnail_pos():
doc = """ The position of the thumbnail in the LRF file """
def fget(self):
return self.info_start+ self.compressed_info_size-4
return { "fget":fget, "doc":doc }
def __add__(self, tb):
""" Return a LRFFile rather than a list as the sum """
return LRFFile(list.__add__(self, tb))
@safe_property
def thumbnail():
doc = \
"""
The thumbnail.
Represented as a string.
The string you would get from the file read function.
"""
def fget(self):
size = self.thumbnail_size
if size:
self._file.seek(self.thumbnail_pos)
return self._file.read(size)
def fset(self, data):
if self.version <= 800:
raise LRFException("Cannot store thumbnails in LRF files \
of version <= 800")
orig_size = self.thumbnail_size
self._file.seek(self.toc_object_offset)
toc = self._file.read(self.object_index_offset - self.toc_object_offset)
self._file.seek(self.object_index_offset)
objects = self._file.read()
self.thumbnail_size = len(data)
self._file.seek(self.thumbnail_pos)
self._file.write(data)
orig_offset = self.toc_object_offset
self.toc_object_offset = self._file.tell()
self._file.write(toc)
self.object_index_offset = self._file.tell()
self._file.write(objects)
self._file.flush()
self._file.truncate() # Incase old thumbnail was bigger than new
ttype = 0x14
if data[1:4] == "PNG":
ttype = 0x12
if data[0:2] == "BM":
ttype = 0x13
if data[0:4] == "JIFF":
ttype = 0x11
self.thumbnail_type = ttype
# Needed as new thumbnail may have different size than old thumbnail
self.update_object_offsets(self.toc_object_offset - orig_offset)
return { "fget":fget, "fset":fset, "doc":doc }
def __getslice__(self, start, end):
""" Return a LRFFile rather than a list as the slice """
return LRFFile(list.__getslice__(self, start, end))
def __init__(self, file):
""" @param file: A file object opened in the r+b mode """
file.seek(0, 2)
self.size = file.tell()
self._file = file
if self.lrf_header != LRFMetaFile.LRF_HEADER:
raise LRFException(file.name + \
" has an invalid LRF header. Are you sure it is an LRF file?")
# Byte at which the compressed meta information starts
self.info_start = 0x58 if self.version > 800 else 0x53
def thumbail_extension(self):
ext = "gif"
ttype = self.thumbnail_type
if ttype == 0x11: ext = "jpeg"
elif ttype == 0x12: ext = "png"
elif ttype == 0x13: ext = "bm"
return ext
@safe
def update_object_offsets(self, delta):
""" Run through the LRF Object index changing the offset by C{delta}. """
self._file.seek(self.object_index_offset)
while(True):
try:
self._file.read(4)
except EOFError:
break
pos = self._file.tell()
try:
offset = self.unpack(fmt=DWORD, start=pos)[0] + delta
except struct.error:
break
self.pack(offset, fmt=DWORD, start=pos)
try:
self._file.read(12)
except EOFError:
break
self._file.flush()
def main():
import sys, os.path
from optparse import OptionParser
from libprs500 import __version__ as VERSION
parser = OptionParser(usage="usage: %prog [options] mybook.lrf\n\nWARNING: Based on reverse engineering the LRF format. Making changes may render your LRF file unreadable. ", version=VERSION)
parser.add_option("-t", "--title", action="store", type="string", dest="title", help="Set the book title")
parser.add_option("-a", "--author", action="store", type="string", dest="author", help="Set the author")
parser.add_option("-c", "--category", action="store", type="string", dest="category", help="The category this book belongs to. E.g.: History")
parser.add_option("--thumbnail", action="store", type="string", dest="thumbnail", help="Path to a graphic that will be set as this files' thumbnail")
parser.add_option("--get-thumbnail", action="store_true", dest="get_thumbnail", default=False, help="Extract thumbnail from LRF file")
parser.add_option("-p", "--page", action="store", type="string", dest="page", help="Don't know what this is for")
options, args = parser.parse_args()
if len(args) != 1:
parser.print_help()
sys.exit(1)
lrf = LRFMetaFile(open(args[0], "r+b"))
if options.title: lrf.title = options.title
if options.author: lrf.author = options.author
if options.category: lrf.category = options.category
if options.page: lrf.page = options.page
if options.thumbnail:
f = open(options.thumbnail, "r")
lrf.thumbnail = f.read()
f.close()
@safe
def unpack(self, fmt=DWORD, start=0):
"""
Return decoded data from file.
@param fmt: See U{struct<http://docs.python.org/lib/module-struct.html>}
@param start: Position in file from which to decode
"""
end = start + struct.calcsize(fmt)
self._file.seek(start)
self._file.seek(start)
ret = struct.unpack(fmt, self._file.read(end-start))
return ret
@safe
def pack(self, *args, **kwargs):
"""
Encode C{args} and write them to file.
C{kwargs} must contain the keywords C{fmt} and C{start}
@param args: The values to pack
@param fmt: See U{struct<http://docs.python.org/lib/module-struct.html>}
@param start: Position in file at which to write encoded data
"""
encoded = struct.pack(kwargs["fmt"], *args)
self._file.seek(kwargs["start"])
self._file.write(encoded)
self._file.flush()
def thumbail_extension(self):
ext = "gif"
ttype = self.thumbnail_type
if ttype == 0x11:
ext = "jpeg"
elif ttype == 0x12:
ext = "png"
elif ttype == 0x13:
ext = "bm"
return ext
if options.get_thumbnail:
t = lrf.thumbnail
td = "None"
if t and len(t) > 0:
td = os.path.basename(args[0])+"_thumbnail_."+lrf.thumbail_extension()
f = open(td, "w")
f.write(t)
f.close()
fields = LRFMetaFile.__dict__.items()
for f in fields:
if "XML" in str(f):
print str(f[1]) + ":", lrf.__getattribute__(f[0])
if options.get_thumbnail: print "Thumbnail:", td
def main():
import sys, os.path
from optparse import OptionParser
from libprs500 import __version__ as VERSION
parser = OptionParser(usage="usage: %prog [options] mybook.lrf\n\
\nWARNING: Based on reverse engineering the LRF format."+\
" Making changes may render your LRF file unreadable. ", \
version=VERSION)
parser.add_option("-t", "--title", action="store", type="string", \
dest="title", help="Set the book title")
parser.add_option("-a", "--author", action="store", type="string", \
dest="author", help="Set the author")
parser.add_option("-c", "--category", action="store", type="string", \
dest="category", help="The category this book belongs"+\
" to. E.g.: History")
parser.add_option("--thumbnail", action="store", type="string", \
dest="thumbnail", help="Path to a graphic that will be"+\
" set as this files' thumbnail")
parser.add_option("--get-thumbnail", action="store_true", \
dest="get_thumbnail", default=False, \
help="Extract thumbnail from LRF file")
parser.add_option("-p", "--page", action="store", type="string", \
dest="page", help="Don't know what this is for")
options, args = parser.parse_args()
if len(args) != 1:
parser.print_help()
sys.exit(1)
lrf = LRFMetaFile(open(args[0], "r+b"))
if options.title:
lrf.title = options.title
if options.author:
lrf.author = options.author
if options.category:
lrf.category = options.category
if options.page:
lrf.page = options.page
if options.thumbnail:
f = open(options.thumbnail, "r")
lrf.thumbnail = f.read()
f.close()
if options.get_thumbnail:
t = lrf.thumbnail
td = "None"
if t and len(t) > 0:
td = os.path.basename(args[0])+"_thumbnail_."+lrf.thumbail_extension()
f = open(td, "w")
f.write(t)
f.close()
fields = LRFMetaFile.__dict__.items()
for f in fields:
if "XML" in str(f):
print str(f[1]) + ":", lrf.__getattribute__(f[0])
if options.get_thumbnail:
print "Thumbnail:", td
# This turns overflow warnings into errors
import warnings
warnings.simplefilter("error", DeprecationWarning)