Fix #2750 ("Unknown" stays listed in author field when creating LRF via ebook-convert) and other minor typos in metadata processing

This commit is contained in:
Kovid Goyal 2009-07-12 21:35:54 -06:00
parent f1936e18da
commit f4841f4d9b
4 changed files with 146 additions and 134 deletions

View File

@ -2,9 +2,9 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>' __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
""" """
This module presents an easy to use interface for getting and setting This module presents an easy to use interface for getting and setting
meta information in LRF files. meta information in LRF files.
Just create an L{LRFMetaFile} object and use its properties Just create an L{LRFMetaFile} object and use its properties
to get and set meta information. For example: to get and set meta information. For example:
>>> lrf = LRFMetaFile("mybook.lrf") >>> lrf = LRFMetaFile("mybook.lrf")
@ -21,8 +21,8 @@ from functools import wraps
from calibre.devices.prs500.prstypes import field from calibre.devices.prs500.prstypes import field
from calibre.ebooks.metadata import MetaInformation, string_to_authors from calibre.ebooks.metadata import MetaInformation, string_to_authors
BYTE = "<B" #: Unsigned char little endian encoded in 1 byte BYTE = "<B" #: Unsigned char little endian encoded in 1 byte
WORD = "<H" #: Unsigned short little endian encoded in 2 bytes WORD = "<H" #: Unsigned short little endian encoded in 2 bytes
DWORD = "<I" #: Unsigned integer little endian encoded in 4 bytes DWORD = "<I" #: Unsigned integer little endian encoded in 4 bytes
QWORD = "<Q" #: Unsigned long long little endian encoded in 8 bytes QWORD = "<Q" #: Unsigned long long little endian encoded in 8 bytes
@ -30,20 +30,20 @@ class versioned_field(field):
def __init__(self, vfield, version, start=0, fmt=WORD): def __init__(self, vfield, version, start=0, fmt=WORD):
field.__init__(self, start=start, fmt=fmt) field.__init__(self, start=start, fmt=fmt)
self.vfield, self.version = vfield, version self.vfield, self.version = vfield, version
def enabled(self): def enabled(self):
return self.vfield > self.version return self.vfield > self.version
def __get__(self, obj, typ=None): def __get__(self, obj, typ=None):
if self.enabled(): if self.enabled():
return field.__get__(self, obj, typ=typ) return field.__get__(self, obj, typ=typ)
else: else:
return None return None
def __set__(self, obj, val): def __set__(self, obj, val):
if not self.enabled(): if not self.enabled():
raise LRFException("Trying to set disabled field") raise LRFException("Trying to set disabled field")
else: else:
field.__set__(self, obj, val) field.__set__(self, obj, val)
class LRFException(Exception): class LRFException(Exception):
@ -53,23 +53,23 @@ class fixed_stringfield(object):
""" A field storing a variable length string. """ """ A field storing a variable length string. """
def __init__(self, length=8, start=0): def __init__(self, length=8, start=0):
""" """
@param length: Size of this string @param length: Size of this string
@param start: The byte at which this field is stored in the buffer @param start: The byte at which this field is stored in the buffer
""" """
self._length = length self._length = length
self._start = start self._start = start
def __get__(self, obj, typ=None): def __get__(self, obj, typ=None):
length = str(self._length) length = str(self._length)
return obj.unpack(start=self._start, fmt="<"+length+"s")[0] return obj.unpack(start=self._start, fmt="<"+length+"s")[0]
def __set__(self, obj, val): def __set__(self, obj, val):
if val.__class__.__name__ != 'str': val = str(val) if val.__class__.__name__ != 'str': val = str(val)
if len(val) != self._length: if len(val) != self._length:
raise LRFException("Trying to set fixed_stringfield with a " + \ raise LRFException("Trying to set fixed_stringfield with a " + \
"string of incorrect length") "string of incorrect length")
obj.pack(val, start=self._start, fmt="<"+str(len(val))+"s") obj.pack(val, start=self._start, fmt="<"+str(len(val))+"s")
def __repr__(self): def __repr__(self):
return "A string of length " + str(self._length) + \ return "A string of length " + str(self._length) + \
" starting at byte " + str(self._start) " starting at byte " + str(self._start)
@ -79,7 +79,7 @@ class xml_attr_field(object):
self.tag_name = tag_name self.tag_name = tag_name
self.parent = parent self.parent = parent
self.attr= attr self.attr= attr
def __get__(self, obj, typ=None): def __get__(self, obj, typ=None):
""" Return the data in this field or '' if the field is empty """ """ Return the data in this field or '' if the field is empty """
document = obj.info document = obj.info
@ -92,7 +92,7 @@ class xml_attr_field(object):
if elem and elem.hasAttribute(self.attr): if elem and elem.hasAttribute(self.attr):
return elem.getAttribute(self.attr) return elem.getAttribute(self.attr)
return '' return ''
def __set__(self, obj, val): def __set__(self, obj, val):
if val == None: if val == None:
val = "" val = ""
@ -106,31 +106,31 @@ class xml_attr_field(object):
if elem: if elem:
elem.setAttribute(self.attr, val) elem.setAttribute(self.attr, val)
obj.info = document obj.info = document
def __repr__(self): def __repr__(self):
return "XML Attr Field: " + self.tag_name + " in " + self.parent return "XML Attr Field: " + self.tag_name + " in " + self.parent
def __str__(self): def __str__(self):
return self.tag_name+'.'+self.attr return self.tag_name+'.'+self.attr
class xml_field(object): class xml_field(object):
""" """
Descriptor that gets and sets XML based meta information from an LRF file. Descriptor that gets and sets XML based meta information from an LRF file.
Works for simple XML fields of the form <tagname>data</tagname> Works for simple XML fields of the form <tagname>data</tagname>
""" """
def __init__(self, tag_name, parent="BookInfo"): def __init__(self, tag_name, parent="BookInfo"):
""" """
@param tag_name: The XML tag whose data we operate on @param tag_name: The XML tag whose data we operate on
@param parent: The tagname of the parent element of C{tag_name} @param parent: The tagname of the parent element of C{tag_name}
""" """
self.tag_name = tag_name self.tag_name = tag_name
self.parent = parent self.parent = parent
def __get__(self, obj, typ=None): def __get__(self, obj, typ=None):
""" Return the data in this field or '' if the field is empty """ """ Return the data in this field or '' if the field is empty """
document = obj.info document = obj.info
elems = document.getElementsByTagName(self.tag_name) elems = document.getElementsByTagName(self.tag_name)
if len(elems): if len(elems):
elem = None elem = None
@ -138,27 +138,27 @@ class xml_field(object):
if candidate.parentNode.nodeName == self.parent: if candidate.parentNode.nodeName == self.parent:
elem = candidate elem = candidate
if elem: if elem:
elem.normalize() elem.normalize()
if elem.hasChildNodes(): if elem.hasChildNodes():
return elem.firstChild.data.strip() return elem.firstChild.data.strip()
return '' return ''
def __set__(self, obj, val): def __set__(self, obj, val):
if not val: if not val:
val = '' val = ''
document = obj.info document = obj.info
def create_elem(): def create_elem():
elem = document.createElement(self.tag_name) elem = document.createElement(self.tag_name)
parent = document.getElementsByTagName(self.parent)[0] parent = document.getElementsByTagName(self.parent)[0]
parent.appendChild(elem) parent.appendChild(elem)
return elem return elem
if not val: if not val:
val = u'' val = u''
if type(val).__name__ != 'unicode': if type(val).__name__ != 'unicode':
val = unicode(val, 'utf-8') val = unicode(val, 'utf-8')
elems = document.getElementsByTagName(self.tag_name) elems = document.getElementsByTagName(self.tag_name)
elem = None elem = None
if len(elems): if len(elems):
@ -169,29 +169,29 @@ class xml_field(object):
elem = create_elem() elem = create_elem()
else: else:
elem.normalize() elem.normalize()
while elem.hasChildNodes(): while elem.hasChildNodes():
elem.removeChild(elem.lastChild) elem.removeChild(elem.lastChild)
else: else:
elem = create_elem() elem = create_elem()
elem.appendChild(document.createTextNode(val)) elem.appendChild(document.createTextNode(val))
obj.info = document obj.info = document
def __str__(self): def __str__(self):
return self.tag_name return self.tag_name
def __repr__(self): def __repr__(self):
return "XML Field: " + self.tag_name + " in " + self.parent return "XML Field: " + self.tag_name + " in " + self.parent
def insert_into_file(fileobj, data, start, end): def insert_into_file(fileobj, data, start, end):
""" """
Insert data into fileobj at position C{start}. Insert data into fileobj at position C{start}.
This function inserts data into a file, overwriting all data between start This function inserts data into a file, overwriting all data between start
and end. If end == start no data is overwritten. Do not use this function to and end. If end == start no data is overwritten. Do not use this function to
append data to a file. append data to a file.
@param fileobj: file like object @param fileobj: file like object
@param data: data to be inserted into fileobj @param data: data to be inserted into fileobj
@param start: The position at which to start inserting data @param start: The position at which to start inserting data
@ -216,7 +216,7 @@ def insert_into_file(fileobj, data, start, end):
def get_metadata(stream): def get_metadata(stream):
""" """
Return basic meta-data about the LRF file in C{stream} as a Return basic meta-data about the LRF file in C{stream} as a
L{MetaInformation} object. L{MetaInformation} object.
@param stream: A file like object or an instance of L{LRFMetaFile} @param stream: A file like object or an instance of L{LRFMetaFile}
""" """
@ -229,6 +229,8 @@ def get_metadata(stream):
tags = [x.strip() for x in mi.category.split(',') if x.strip()] tags = [x.strip() for x in mi.category.split(',') if x.strip()]
if tags: if tags:
mi.tags = tags mi.tags = tags
if mi.category.strip() == ',':
mi.category = None
mi.publisher = lrf.publisher.strip() mi.publisher = lrf.publisher.strip()
mi.cover_data = lrf.get_cover() mi.cover_data = lrf.get_cover()
try: try:
@ -254,14 +256,14 @@ def get_metadata(stream):
if not mi.publisher or 'unknown' in mi.publisher.lower() or \ if not mi.publisher or 'unknown' in mi.publisher.lower() or \
'some publisher' in mi.publisher.lower(): 'some publisher' in mi.publisher.lower():
mi.publisher = None mi.publisher = None
return mi return mi
class LRFMetaFile(object): class LRFMetaFile(object):
""" Has properties to read and write all Meta information in a LRF file. """ """ Has properties to read and write all Meta information in a LRF file. """
#: The first 6 bytes of all valid LRF files #: The first 6 bytes of all valid LRF files
LRF_HEADER = 'LRF'.encode('utf-16le') LRF_HEADER = 'LRF'.encode('utf-16le')
lrf_header = fixed_stringfield(length=6, start=0x0) lrf_header = fixed_stringfield(length=6, start=0x0)
version = field(fmt=WORD, start=0x8) version = field(fmt=WORD, start=0x8)
xor_key = field(fmt=WORD, start=0xa) xor_key = field(fmt=WORD, start=0xa)
@ -280,9 +282,9 @@ class LRFMetaFile(object):
thumbnail_size = versioned_field(version, 800, fmt=DWORD, start=0x50) thumbnail_size = versioned_field(version, 800, fmt=DWORD, start=0x50)
uncompressed_info_size = versioned_field(compressed_info_size, 0, \ uncompressed_info_size = versioned_field(compressed_info_size, 0, \
fmt=DWORD, start=0x54) fmt=DWORD, start=0x54)
title = xml_field("Title", parent="BookInfo") title = xml_field("Title", parent="BookInfo")
title_reading = xml_attr_field("Title", 'reading', parent="BookInfo") title_reading = xml_attr_field("Title", 'reading', parent="BookInfo")
author = xml_field("Author", parent="BookInfo") author = xml_field("Author", parent="BookInfo")
author_reading = xml_attr_field("Author", 'reading', parent="BookInfo") author_reading = xml_attr_field("Author", 'reading', parent="BookInfo")
# 16 characters. First two chars should be FB for personal use ebooks. # 16 characters. First two chars should be FB for personal use ebooks.
@ -296,17 +298,17 @@ class LRFMetaFile(object):
language = xml_field("Language", parent="DocInfo") language = xml_field("Language", parent="DocInfo")
creator = xml_field("Creator", parent="DocInfo") creator = xml_field("Creator", parent="DocInfo")
# Format is %Y-%m-%d # Format is %Y-%m-%d
creation_date = xml_field("CreationDate", parent="DocInfo") creation_date = xml_field("CreationDate", parent="DocInfo")
producer = xml_field("Producer", parent="DocInfo") producer = xml_field("Producer", parent="DocInfo")
page = xml_field("SumPage", parent="DocInfo") page = xml_field("SumPage", parent="DocInfo")
def safe(func): def safe(func):
""" """
Decorator that ensures that function calls leave the pos Decorator that ensures that function calls leave the pos
in the underlying file unchanged in the underlying file unchanged
""" """
@wraps(func) @wraps(func)
def restore_pos(*args, **kwargs): def restore_pos(*args, **kwargs):
obj = args[0] obj = args[0]
pos = obj._file.tell() pos = obj._file.tell()
res = func(*args, **kwargs) res = func(*args, **kwargs)
@ -315,44 +317,44 @@ class LRFMetaFile(object):
obj._file.seek(pos) obj._file.seek(pos)
return res return res
return restore_pos return restore_pos
def safe_property(func): def safe_property(func):
""" """
Decorator that ensures that read or writing a property leaves Decorator that ensures that read or writing a property leaves
the position in the underlying file unchanged the position in the underlying file unchanged
""" """
def decorator(f): def decorator(f):
def restore_pos(*args, **kwargs): def restore_pos(*args, **kwargs):
obj = args[0] obj = args[0]
pos = obj._file.tell() pos = obj._file.tell()
res = f(*args, **kwargs) res = f(*args, **kwargs)
obj._file.seek(0, 2) obj._file.seek(0, 2)
if obj._file.tell() >= pos: if obj._file.tell() >= pos:
obj._file.seek(pos) obj._file.seek(pos)
return res return res
return restore_pos return restore_pos
locals_ = func() locals_ = func()
if locals_.has_key("fget"): if locals_.has_key("fget"):
locals_["fget"] = decorator(locals_["fget"]) locals_["fget"] = decorator(locals_["fget"])
if locals_.has_key("fset"): if locals_.has_key("fset"):
locals_["fset"] = decorator(locals_["fset"]) locals_["fset"] = decorator(locals_["fset"])
return property(**locals_) return property(**locals_)
@safe_property @safe_property
def info(): def info():
doc = \ doc = \
""" """
Document meta information as a minidom Document object. Document meta information as a minidom Document object.
To set use a minidom document object. To set use a minidom document object.
""" """
def fget(self): def fget(self):
if self.compressed_info_size == 0: if self.compressed_info_size == 0:
raise LRFException("This document has no meta info") raise LRFException("This document has no meta info")
size = self.compressed_info_size - 4 size = self.compressed_info_size - 4
self._file.seek(self.info_start) self._file.seek(self.info_start)
try: try:
src = zlib.decompress(self._file.read(size)) src = zlib.decompress(self._file.read(size))
if len(src) != self.uncompressed_info_size: if len(src) != self.uncompressed_info_size:
raise LRFException("Decompression of document meta info\ raise LRFException("Decompression of document meta info\
yielded unexpected results") yielded unexpected results")
try: try:
@ -365,7 +367,7 @@ class LRFMetaFile(object):
return dom.parseString(src.encode('utf-8')) return dom.parseString(src.encode('utf-8'))
except zlib.error: except zlib.error:
raise LRFException("Unable to decompress document meta information") raise LRFException("Unable to decompress document meta information")
def fset(self, document): def fset(self, document):
info = document.toxml('utf-8') info = document.toxml('utf-8')
self.uncompressed_info_size = len(info) self.uncompressed_info_size = len(info)
@ -374,50 +376,50 @@ class LRFMetaFile(object):
self.compressed_info_size = len(stream) + 4 self.compressed_info_size = len(stream) + 4
delta = insert_into_file(self._file, stream, self.info_start, \ delta = insert_into_file(self._file, stream, self.info_start, \
self.info_start + orig_size - 4) self.info_start + orig_size - 4)
if self.toc_object_offset > 0: if self.toc_object_offset > 0:
self.toc_object_offset += delta self.toc_object_offset += delta
self.object_index_offset += delta self.object_index_offset += delta
self.update_object_offsets(delta) self.update_object_offsets(delta)
return { "fget":fget, "fset":fset, "doc":doc } return { "fget":fget, "fset":fset, "doc":doc }
@safe_property @safe_property
def thumbnail_pos(): def thumbnail_pos():
doc = """ The position of the thumbnail in the LRF file """ doc = """ The position of the thumbnail in the LRF file """
def fget(self): def fget(self):
return self.info_start + self.compressed_info_size-4 return self.info_start + self.compressed_info_size-4
return { "fget":fget, "doc":doc } return { "fget":fget, "doc":doc }
@classmethod @classmethod
def _detect_thumbnail_type(cls, slice): def _detect_thumbnail_type(cls, slice):
""" @param slice: The first 16 bytes of the thumbnail """ """ @param slice: The first 16 bytes of the thumbnail """
ttype = 0x14 # GIF ttype = 0x14 # GIF
if "PNG" in slice: if "PNG" in slice:
ttype = 0x12 ttype = 0x12
if "BM" in slice: if "BM" in slice:
ttype = 0x13 ttype = 0x13
if "JFIF" in slice: if "JFIF" in slice:
ttype = 0x11 ttype = 0x11
return ttype return ttype
@safe_property @safe_property
def thumbnail(): def thumbnail():
doc = \ doc = \
""" """
The thumbnail. The thumbnail.
Represented as a string. Represented as a string.
The string you would get from the file read function. The string you would get from the file read function.
""" """
def fget(self): def fget(self):
size = self.thumbnail_size size = self.thumbnail_size
if size: if size:
self._file.seek(self.thumbnail_pos) self._file.seek(self.thumbnail_pos)
return self._file.read(size) return self._file.read(size)
def fset(self, data): def fset(self, data):
if self.version <= 800: if self.version <= 800:
raise LRFException("Cannot store thumbnails in LRF files \ raise LRFException("Cannot store thumbnails in LRF files \
of version <= 800") of version <= 800")
slice = data[0:16] slice = data[0:16]
@ -425,13 +427,13 @@ class LRFMetaFile(object):
self.thumbnail_size = len(data) self.thumbnail_size = len(data)
delta = insert_into_file(self._file, data, self.thumbnail_pos, \ delta = insert_into_file(self._file, data, self.thumbnail_pos, \
self.thumbnail_pos + orig_size) self.thumbnail_pos + orig_size)
self.toc_object_offset += delta self.toc_object_offset += delta
self.object_index_offset += delta self.object_index_offset += delta
self.thumbnail_type = self._detect_thumbnail_type(slice) self.thumbnail_type = self._detect_thumbnail_type(slice)
self.update_object_offsets(delta) self.update_object_offsets(delta)
return { "fget":fget, "fset":fset, "doc":doc } return { "fget":fget, "fset":fset, "doc":doc }
def __init__(self, file): def __init__(self, file):
""" @param file: A file object opened in the r+b mode """ """ @param file: A file object opened in the r+b mode """
file.seek(0, 2) file.seek(0, 2)
@ -439,10 +441,10 @@ class LRFMetaFile(object):
self._file = file self._file = file
if self.lrf_header != LRFMetaFile.LRF_HEADER: if self.lrf_header != LRFMetaFile.LRF_HEADER:
raise LRFException(file.name + \ raise LRFException(file.name + \
" has an invalid LRF header. Are you sure it is an LRF file?") " has an invalid LRF header. Are you sure it is an LRF file?")
# Byte at which the compressed meta information starts # Byte at which the compressed meta information starts
self.info_start = 0x58 if self.version > 800 else 0x53 self.info_start = 0x58 if self.version > 800 else 0x53
@safe @safe
def update_object_offsets(self, delta): def update_object_offsets(self, delta):
""" Run through the LRF Object index changing the offset by C{delta}. """ """ Run through the LRF Object index changing the offset by C{delta}. """
@ -458,12 +460,12 @@ class LRFMetaFile(object):
self._file.seek(8, os.SEEK_CUR) self._file.seek(8, os.SEEK_CUR)
count -= 1 count -= 1
self._file.flush() self._file.flush()
@safe @safe
def unpack(self, fmt=DWORD, start=0): def unpack(self, fmt=DWORD, start=0):
""" """
Return decoded data from file. Return decoded data from file.
@param fmt: See U{struct<http://docs.python.org/lib/module-struct.html>} @param fmt: See U{struct<http://docs.python.org/lib/module-struct.html>}
@param start: Position in file from which to decode @param start: Position in file from which to decode
""" """
@ -471,62 +473,62 @@ class LRFMetaFile(object):
self._file.seek(start) self._file.seek(start)
ret = struct.unpack(fmt, self._file.read(end-start)) ret = struct.unpack(fmt, self._file.read(end-start))
return ret return ret
@safe @safe
def pack(self, *args, **kwargs): def pack(self, *args, **kwargs):
""" """
Encode C{args} and write them to file. Encode C{args} and write them to file.
C{kwargs} must contain the keywords C{fmt} and C{start} C{kwargs} must contain the keywords C{fmt} and C{start}
@param args: The values to pack @param args: The values to pack
@param fmt: See U{struct<http://docs.python.org/lib/module-struct.html>} @param fmt: See U{struct<http://docs.python.org/lib/module-struct.html>}
@param start: Position in file at which to write encoded data @param start: Position in file at which to write encoded data
""" """
encoded = struct.pack(kwargs["fmt"], *args) encoded = struct.pack(kwargs["fmt"], *args)
self._file.seek(kwargs["start"]) self._file.seek(kwargs["start"])
self._file.write(encoded) self._file.write(encoded)
self._file.flush() self._file.flush()
def thumbail_extension(self): def thumbail_extension(self):
""" """
Return the extension for the thumbnail image type as specified Return the extension for the thumbnail image type as specified
by L{self.thumbnail_type}. If the LRF file was created by buggy by L{self.thumbnail_type}. If the LRF file was created by buggy
software, the extension maye be incorrect. See L{self.fix_thumbnail_type}. software, the extension maye be incorrect. See L{self.fix_thumbnail_type}.
""" """
ext = "gif" ext = "gif"
ttype = self.thumbnail_type ttype = self.thumbnail_type
if ttype == 0x11: if ttype == 0x11:
ext = "jpeg" ext = "jpeg"
elif ttype == 0x12: elif ttype == 0x12:
ext = "png" ext = "png"
elif ttype == 0x13: elif ttype == 0x13:
ext = "bmp" ext = "bmp"
return ext return ext
def fix_thumbnail_type(self): def fix_thumbnail_type(self):
""" """
Attempt to guess the thumbnail image format and set Attempt to guess the thumbnail image format and set
L{self.thumbnail_type} accordingly. L{self.thumbnail_type} accordingly.
""" """
slice = self.thumbnail[0:16] slice = self.thumbnail[0:16]
self.thumbnail_type = self._detect_thumbnail_type(slice) self.thumbnail_type = self._detect_thumbnail_type(slice)
def seek(self, *args): def seek(self, *args):
""" See L{file.seek} """ """ See L{file.seek} """
return self._file.seek(*args) return self._file.seek(*args)
def tell(self): def tell(self):
""" See L{file.tell} """ """ See L{file.tell} """
return self._file.tell() return self._file.tell()
def read(self): def read(self):
""" See L{file.read} """ """ See L{file.read} """
return self._file.read() return self._file.read()
def write(self, val): def write(self, val):
""" See L{file.write} """ """ See L{file.write} """
self._file.write(val) self._file.write(val)
def _objects(self): def _objects(self):
self._file.seek(self.object_index_offset) self._file.seek(self.object_index_offset)
c = self.number_of_objects c = self.number_of_objects
@ -536,7 +538,7 @@ class LRFMetaFile(object):
pos = self._file.tell() pos = self._file.tell()
yield struct.unpack('<IIII', raw)[:3] yield struct.unpack('<IIII', raw)[:3]
self._file.seek(pos) self._file.seek(pos)
def get_objects_by_type(self, type): def get_objects_by_type(self, type):
from calibre.ebooks.lrf.tags import Tag from calibre.ebooks.lrf.tags import Tag
objects = [] objects = []
@ -548,7 +550,7 @@ class LRFMetaFile(object):
if obj_type == type: if obj_type == type:
objects.append((obj_id, offset, size)) objects.append((obj_id, offset, size))
return objects return objects
def get_object_by_id(self, tid): def get_object_by_id(self, tid):
from calibre.ebooks.lrf.tags import Tag from calibre.ebooks.lrf.tags import Tag
for id, offset, size in self._objects(): for id, offset, size in self._objects():
@ -559,19 +561,19 @@ class LRFMetaFile(object):
if obj_id == tid: if obj_id == tid:
return obj_id, offset, size, obj_type return obj_id, offset, size, obj_type
return (False, False, False, False) return (False, False, False, False)
@safe @safe
def get_cover(self): def get_cover(self):
from calibre.ebooks.lrf.objects import get_object from calibre.ebooks.lrf.objects import get_object
for id, offset, size in self.get_objects_by_type(0x0C): for id, offset, size in self.get_objects_by_type(0x0C):
image = get_object(None, self._file, id, offset, size, self.xor_key) image = get_object(None, self._file, id, offset, size, self.xor_key)
id, offset, size = self.get_object_by_id(image.refstream)[:3] id, offset, size = self.get_object_by_id(image.refstream)[:3]
image_stream = get_object(None, self._file, id, offset, size, self.xor_key) image_stream = get_object(None, self._file, id, offset, size, self.xor_key)
return image_stream.file.rpartition('.')[-1], image_stream.stream return image_stream.file.rpartition('.')[-1], image_stream.stream
return None return None
def option_parser(): def option_parser():
from calibre.utils.config import OptionParser from calibre.utils.config import OptionParser
@ -580,7 +582,7 @@ def option_parser():
_('''%prog [options] mybook.lrf _('''%prog [options] mybook.lrf
Show/edit the metadata in an LRF file.\n\n'''), Show/edit the metadata in an LRF file.\n\n'''),
version=__appname__+' '+__version__, version=__appname__+' '+__version__,
epilog='Created by Kovid Goyal') epilog='Created by Kovid Goyal')
parser.add_option("-t", "--title", action="store", type="string", \ parser.add_option("-t", "--title", action="store", type="string", \
@ -607,7 +609,7 @@ Show/edit the metadata in an LRF file.\n\n'''),
parser.add_option('--classification', default=None, help=_('Set the book classification')) parser.add_option('--classification', default=None, help=_('Set the book classification'))
parser.add_option('--creator', default=None, help=_('Set the book creator')) parser.add_option('--creator', default=None, help=_('Set the book creator'))
parser.add_option('--producer', default=None, help=_('Set the book producer')) parser.add_option('--producer', default=None, help=_('Set the book producer'))
parser.add_option('--get-cover', action='store_true', default=False, parser.add_option('--get-cover', action='store_true', default=False,
help=_('Extract cover from LRF file. Note that the LRF format has no defined cover, so we use some heuristics to guess the cover.')) help=_('Extract cover from LRF file. Note that the LRF format has no defined cover, so we use some heuristics to guess the cover.'))
parser.add_option('--bookid', action='store', type='string', default=None, parser.add_option('--bookid', action='store', type='string', default=None,
dest='book_id', help=_('Set book ID')) dest='book_id', help=_('Set book ID'))
@ -615,7 +617,7 @@ Show/edit the metadata in an LRF file.\n\n'''),
# Basically, the total pages per the page size, font size, etc. when the LRF is first created. Since this will change as the book is reflowed, it is probably not worth using. # Basically, the total pages per the page size, font size, etc. when the LRF is first created. Since this will change as the book is reflowed, it is probably not worth using.
#parser.add_option("-p", "--page", action="store", type="string", \ #parser.add_option("-p", "--page", action="store", type="string", \
# dest="page", help=_("Don't know what this is for")) # dest="page", help=_("Don't know what this is for"))
return parser return parser
def set_metadata(stream, mi): def set_metadata(stream, mi):
@ -628,13 +630,13 @@ def set_metadata(stream, mi):
lrf.category = mi.tags[0] lrf.category = mi.tags[0]
if getattr(mi, 'category', False): if getattr(mi, 'category', False):
lrf.category = mi.category lrf.category = mi.category
if mi.comments: if mi.comments:
lrf.free_text = mi.comments lrf.free_text = mi.comments
if mi.author_sort: if mi.author_sort:
lrf.author_reading = mi.author_sort lrf.author_reading = mi.author_sort
if mi.publisher: if mi.publisher:
lrf.publisher = mi.publisher lrf.publisher = mi.publisher
def main(args=sys.argv): def main(args=sys.argv):
parser = option_parser() parser = option_parser()
@ -645,7 +647,7 @@ def main(args=sys.argv):
print 'No lrf file specified' print 'No lrf file specified'
return 1 return 1
lrf = LRFMetaFile(open(args[1], "r+b")) lrf = LRFMetaFile(open(args[1], "r+b"))
if options.title: if options.title:
lrf.title = options.title lrf.title = options.title
if options.title_reading != None: if options.title_reading != None:
@ -658,7 +660,7 @@ def main(args=sys.argv):
lrf.publisher = options.publisher lrf.publisher = options.publisher
if options.classification: if options.classification:
lrf.classification = options.classification lrf.classification = options.classification
if options.category: if options.category:
lrf.category = options.category lrf.category = options.category
if options.creator: if options.creator:
lrf.creator = options.creator lrf.creator = options.creator
@ -670,7 +672,7 @@ def main(args=sys.argv):
lrf.thumbnail = f.read() lrf.thumbnail = f.read()
f.close() f.close()
if options.book_id is not None: if options.book_id is not None:
lrf.book_id = options.book_id lrf.book_id = options.book_id
if options.comment: if options.comment:
path = os.path.expanduser(os.path.expandvars(options.comment)) path = os.path.expanduser(os.path.expandvars(options.comment))
lrf.free_text = open(path).read() lrf.free_text = open(path).read()
@ -682,13 +684,13 @@ def main(args=sys.argv):
f = open(td, "w") f = open(td, "w")
f.write(t) f.write(t)
f.close() f.close()
fields = LRFMetaFile.__dict__.items() fields = LRFMetaFile.__dict__.items()
fields.sort() fields.sort()
for f in fields: for f in fields:
if "XML" in str(f): if "XML" in str(f):
print str(f[1]) + ":", lrf.__getattribute__(f[0]).encode('utf-8') print str(f[1]) + ":", lrf.__getattribute__(f[0]).encode('utf-8')
if options.get_thumbnail: if options.get_thumbnail:
print "Thumbnail:", td print "Thumbnail:", td
if options.get_cover: if options.get_cover:
try: try:
@ -701,7 +703,7 @@ def main(args=sys.argv):
print 'Cover:', cover print 'Cover:', cover
else: else:
print 'Could not find cover in the LRF file' print 'Could not find cover in the LRF file'
if __name__ == '__main__': if __name__ == '__main__':
sys.exit(main()) sys.exit(main())

View File

@ -22,9 +22,18 @@ class LRFOptions(object):
m = oeb.metadata m = oeb.metadata
self.title = None self.title = None
self.author = self.publisher = _('Unknown') self.author = self.publisher = _('Unknown')
self.title_sort = self.author_sort = ''
for x in m.creator:
if x.role == 'aut':
self.author = unicode(x)
fa = unicode(getattr(x, 'file_as', ''))
if fa:
self.author_sort = fa
for x in m.title:
if unicode(x.file_as):
self.title_sort = unicode(x.file_as)
self.freetext = f2s(m.description) self.freetext = f2s(m.description)
self.category = f2s(m.tags) self.category = f2s(m.tags)
self.title_sort = self.author_sort = ''
self.cover = None self.cover = None
self.use_metadata_cover = True self.use_metadata_cover = True
self.output = output self.output = output

View File

@ -317,7 +317,7 @@ class MetaInformation(object):
if getattr(self, 'book_producer', False): if getattr(self, 'book_producer', False):
fmt('Book Producer', self.book_producer) fmt('Book Producer', self.book_producer)
if self.category: if self.category:
ans += u'Category : ' + unicode(self.category) + u'\n' fmt('Category', self.category)
if self.comments: if self.comments:
fmt('Comments', self.comments) fmt('Comments', self.comments)
if self.isbn: if self.isbn:

View File

@ -10,6 +10,7 @@ import os
from datetime import datetime from datetime import datetime
def meta_info_to_oeb_metadata(mi, m, log): def meta_info_to_oeb_metadata(mi, m, log):
from calibre.ebooks.oeb.base import OPF
if mi.title: if mi.title:
m.clear('title') m.clear('title')
m.add('title', mi.title) m.add('title', mi.title)
@ -22,7 +23,7 @@ def meta_info_to_oeb_metadata(mi, m, log):
for a in mi.authors: for a in mi.authors:
attrib = {'role':'aut'} attrib = {'role':'aut'}
if mi.author_sort: if mi.author_sort:
attrib['file_as'] = mi.author_sort attrib[OPF('file-as')] = mi.author_sort
m.add('creator', a, attrib=attrib) m.add('creator', a, attrib=attrib)
if mi.book_producer: if mi.book_producer:
m.filter('contributor', lambda x : x.role.lower() == 'bkp') m.filter('contributor', lambda x : x.role.lower() == 'bkp')