From c1c0099354fdf8a0746e13c2deeca84eee334120 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 15 Jan 2013 09:16:09 +0530 Subject: [PATCH] LIT Input: Handle lit files that set an incorrect XML mimetype for their text. Fixes #1099621 (problem converting .lit files) --- src/calibre/ebooks/lit/reader.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/src/calibre/ebooks/lit/reader.py b/src/calibre/ebooks/lit/reader.py index a673de87d7..98b230e5bb 100644 --- a/src/calibre/ebooks/lit/reader.py +++ b/src/calibre/ebooks/lit/reader.py @@ -11,13 +11,17 @@ import struct, os, functools, re from urlparse import urldefrag from cStringIO import StringIO from urllib import unquote as urlunquote + +from lxml import etree + from calibre.ebooks.lit import LitError from calibre.ebooks.lit.maps import OPF_MAP, HTML_MAP import calibre.ebooks.lit.mssha1 as mssha1 -from calibre.ebooks.oeb.base import urlnormalize +from calibre.ebooks.oeb.base import urlnormalize, xpath from calibre.ebooks.oeb.reader import OEBReader from calibre.ebooks import DRMError from calibre import plugins + lzx, lxzerror = plugins['lzx'] msdes, msdeserror = plugins['msdes'] @@ -907,3 +911,16 @@ class LitReader(OEBReader): Container = LitContainer DEFAULT_PROFILE = 'MSReader' + def _spine_from_opf(self, opf): + manifest = self.oeb.manifest + for elem in xpath(opf, '/o2:package/o2:spine/o2:itemref'): + idref = elem.get('idref') + if idref not in manifest.ids: + continue + item = manifest.ids[idref] + if (item.media_type.lower() == 'application/xml' and + hasattr(item.data, 'xpath') and item.data.xpath('/html')): + item.media_type = 'application/xhtml+xml' + item.data = item._parse_xhtml(etree.tostring(item.data)) + super(LitReader, self)._spine_from_opf(opf) +