IGN:Read publish date from MOBI books and replace <content> tag with <div>. Also revert writing of <mbp:section> as it causes problems with some MOBI readers

This commit is contained in:
Kovid Goyal 2009-03-28 17:21:27 -07:00
parent a40d47956f
commit 5e1e75f491
2 changed files with 14 additions and 7 deletions

View File

@ -5,7 +5,7 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
Read data from .mobi files
'''
import sys, struct, os, cStringIO, re, functools
import sys, struct, os, cStringIO, re, functools, datetime
try:
from PIL import Image as PILImage
@ -73,8 +73,14 @@ class EXTHHeader(object):
if not self.mi.tags:
self.mi.tags = []
self.mi.tags.append(content.decode(codec, 'ignore'))
#else:
# print 'unhandled metadata record', id, repr(content), codec
elif id == 106:
try:
self.mi.publish_date = datetime.datetime.strptime(
content, '%Y-%m-%d',).date()
except:
pass
else:
print 'unhandled metadata record', id, repr(content)
class BookHeader(object):
@ -305,8 +311,8 @@ class MobiReader(object):
mobi_version = self.book_header.mobi_version
for tag in root.iter(etree.Element):
if tag.tag in ('country-region', 'place', 'placetype', 'placename',
'state', 'city', 'street', 'address'):
tag.tag = 'span'
'state', 'city', 'street', 'address', 'content'):
tag.tag = 'div' if tag.tag == 'content' else 'span'
for key in tag.attrib.keys():
tag.attrib.pop(key)
continue

View File

@ -211,13 +211,14 @@ class Serializer(object):
def serialize_item(self, item):
buffer = self.buffer
buffer.write('<mbp:section>')
#buffer.write('<mbp:section>')
if not item.linear:
self.breaks.append(buffer.tell() - 1)
self.id_offsets[item.href] = buffer.tell()
for elem in item.data.find(XHTML('body')):
self.serialize_elem(elem, item)
buffer.write('</mbp:section></mbp:pagebreak>')
#buffer.write('</mbp:section>')
buffer.write('</mbp:pagebreak>')
def serialize_elem(self, elem, item, nsrmap=NSRMAP):
buffer = self.buffer