diff --git a/src/calibre/ebooks/mobi/reader.py b/src/calibre/ebooks/mobi/reader.py index 490127357a..e2bb6f6d5f 100644 --- a/src/calibre/ebooks/mobi/reader.py +++ b/src/calibre/ebooks/mobi/reader.py @@ -28,7 +28,7 @@ from calibre import sanitize_file_name class EXTHHeader(object): - def __init__(self, raw, codec): + def __init__(self, raw, codec, title): self.doctype = raw[:4] self.length, self.num_items = struct.unpack('>LL', raw[4:12]) raw = raw[12:] @@ -52,17 +52,9 @@ class EXTHHeader(object): self.thumbnail_offset, = struct.unpack('>L', content) #else: # print 'unknown record', id, repr(content) - title = re.search(r'\0+([^\0]+)\0+', raw[pos:]) if title: - title = title.group(1).decode(codec, 'replace') - if len(title) > 2: - self.mi.title = title - else: - title = re.search(r'\0+([^\0]+)\0+', ''.join(reversed(raw[pos:]))) - if title: - self.mi.title = ''.join(reversed(title.group(1).decode(codec, 'replace'))) - - + self.mi.title = title + def process_metadata(self, id, content, codec): if id == 100: if self.mi.authors == [_('Unknown')]: @@ -121,6 +113,9 @@ class BookHeader(object): if self.compression_type == 'DH': self.huff_offset, self.huff_number = struct.unpack('>LL', raw[0x70:0x78]) + toff, tlen = struct.unpack('>II', raw[0x54:0x5c]) + tend = toff + tlen + self.title = raw[toff:tend] if tend < len(raw) else _('Unknown') langcode = struct.unpack('!L', raw[0x5C:0x60])[0] langid = langcode & 0xFF sublangid = (langcode >> 10) & 0xFF @@ -131,7 +126,7 @@ class BookHeader(object): self.exth_flag, = struct.unpack('>L', raw[0x80:0x84]) self.exth = None if self.exth_flag & 0x40: - self.exth = EXTHHeader(raw[16+self.length:], self.codec) + self.exth = EXTHHeader(raw[16+self.length:], self.codec, self.title) self.exth.mi.uid = self.unique_id self.exth.mi.language = self.language diff --git a/src/calibre/ebooks/mobi/writer.py b/src/calibre/ebooks/mobi/writer.py index 3c5a39ebd2..92ecdf1a46 100644 --- a/src/calibre/ebooks/mobi/writer.py +++ b/src/calibre/ebooks/mobi/writer.py @@ -23,6 +23,7 @@ from PIL import Image from calibre.ebooks.oeb.base import XML_NS, XHTML, XHTML_NS, OEB_DOCS, \ OEB_RASTER_IMAGES from calibre.ebooks.oeb.base import xpath, barename, namespace, prefixname +from calibre.ebooks.oeb.base import urlnormalize from calibre.ebooks.oeb.base import Logger, OEBBook from calibre.ebooks.oeb.profile import Context from calibre.ebooks.oeb.transforms.flatcss import CSSFlattener @@ -178,7 +179,7 @@ class Serializer(object): def serialize_href(self, href, base=None): hrefs = self.oeb.manifest.hrefs - path, frag = urldefrag(href) + path, frag = urldefrag(urlnormalize(href)) if path and base: path = base.abshref(path) if path and path not in hrefs: diff --git a/src/calibre/ebooks/oeb/base.py b/src/calibre/ebooks/oeb/base.py index 61a41443bc..927592ac0e 100644 --- a/src/calibre/ebooks/oeb/base.py +++ b/src/calibre/ebooks/oeb/base.py @@ -447,7 +447,7 @@ class Manifest(object): return cmp(skey, okey) def relhref(self, href): - if '/' not in self.href: + if '/' not in self.href or ':' in href: return href base = os.path.dirname(self.href).split('/') target, frag = urldefrag(href) @@ -463,7 +463,7 @@ class Manifest(object): return relhref def abshref(self, href): - if '/' not in self.href: + if '/' not in self.href or ':' in href: return href dirname = os.path.dirname(self.href) href = os.path.join(dirname, href) @@ -796,6 +796,10 @@ class TOC(object): class OEBBook(object): + COVER_SVG_XP = XPath('h:body//svg:svg[position() = 1]') + COVER_OBJECT_XP = XPath('h:body//h:object[@data][position() = 1]') + COVER_IMG_XP = XPath('h:body//h:img[@src][position() = 1]') + def __init__(self, opfpath=None, container=None, encoding=None, logger=FauxLogger()): if opfpath and not container: @@ -971,7 +975,7 @@ class OEBBook(object): ncx = item.data self.manifest.remove(item) title = xpath(ncx, 'ncx:docTitle/ncx:text/text()') - title = title[0].strip() if title else unicode(self.metadata.title) + title = title[0].strip() if title else unicode(self.metadata.title[0]) self.toc = toc = TOC(title) navmaps = xpath(ncx, 'ncx:navMap') for navmap in navmaps: @@ -1051,42 +1055,56 @@ class OEBBook(object): if self._toc_from_html(opf): return self._toc_from_spine(opf) - def _ensure_cover_image(self): - cover = None + def _locate_cover_image(self): + if self.metadata.cover: + id = str(self.metadata.cover[0]) + item = self.manifest.ids.get(id, None) + if item is not None: + return item hcover = self.spine[0] if 'cover' in self.guide: href = self.guide['cover'].href item = self.manifest.hrefs[href] media_type = item.media_type - if media_type in OEB_RASTER_IMAGES: - cover = item + if media_type in OEB_IMAGES: + return item elif media_type in OEB_DOCS: hcover = item html = hcover.data - if cover is not None: - pass - elif self.metadata.cover: - id = str(self.metadata.cover[0]) - cover = self.manifest.ids[id] - elif MS_COVER_TYPE in self.guide: + if MS_COVER_TYPE in self.guide: href = self.guide[MS_COVER_TYPE].href - cover = self.manifest.hrefs[href] - elif xpath(html, '//h:img[position()=1]'): - img = xpath(html, '//h:img[position()=1]')[0] - href = hcover.abshref(img.get('src')) - cover = self.manifest.hrefs[href] - elif xpath(html, '//h:object[position()=1]'): - object = xpath(html, '//h:object[position()=1]')[0] - href = hcover.abshref(object.get('data')) - cover = self.manifest.hrefs[href] - elif xpath(html, '//svg:svg[position()=1]'): - svg = copy.deepcopy(xpath(html, '//svg:svg[position()=1]')[0]) + item = self.manifest.hrefs.get(href, None) + if item is not None and item.media_type in OEB_IMAGES: + return item + if self.COVER_SVG_XP(html): + svg = copy.deepcopy(self.COVER_SVG_XP(html)[0]) href = os.path.splitext(hcover.href)[0] + '.svg' id, href = self.manifest.generate(hcover.id, href) - cover = self.manifest.add(id, href, SVG_MIME, data=svg) - if cover and not self.metadata.cover: - self.metadata.add('cover', cover.id) - + item = self.manifest.add(id, href, SVG_MIME, data=svg) + return item + if self.COVER_OBJECT_XP(html): + object = self.COVER_OBJECT_XP(html)[0] + href = hcover.abshref(object.get('data')) + item = self.manifest.hrefs.get(href, None) + if item is not None and item.media_type in OEB_IMAGES: + return item + if self.COVER_IMG_XP(html): + img = self.COVER_IMG_XP(html)[0] + href = hcover.abshref(img.get('src')) + item = self.manifest.hrefs.get(href, None) + if item is not None and item.media_type in OEB_IMAGES: + return item + return None + + def _ensure_cover_image(self): + cover = self._locate_cover_image() + if not cover: + return + if self.metadata.cover: + self.metadata.cover[0].value = cover.id + return + self.metadata.add('cover', cover.id) + def _all_from_opf(self, opf): self._metadata_from_opf(opf) self._manifest_from_opf(opf) diff --git a/src/calibre/ebooks/oeb/transforms/rasterize.py b/src/calibre/ebooks/oeb/transforms/rasterize.py index 97d73d3dcb..12a2812898 100644 --- a/src/calibre/ebooks/oeb/transforms/rasterize.py +++ b/src/calibre/ebooks/oeb/transforms/rasterize.py @@ -23,6 +23,7 @@ from PyQt4.QtGui import QApplication from calibre.ebooks.oeb.base import XHTML_NS, XHTML, SVG_NS, SVG, XLINK from calibre.ebooks.oeb.base import SVG_MIME, PNG_MIME, JPEG_MIME from calibre.ebooks.oeb.base import xml2str, xpath, namespace, barename +from calibre.ebooks.oeb.base import urlnormalize from calibre.ebooks.oeb.stylizer import Stylizer IMAGE_TAGS = set([XHTML('img'), XHTML('object')]) @@ -78,7 +79,7 @@ class SVGRasterizer(object): svg = item.data hrefs = self.oeb.manifest.hrefs for elem in xpath(svg, '//svg:*[@xl:href]'): - href = elem.attrib[XLINK('href')] + href = urlnormalize(elem.attrib[XLINK('href')]) path, frag = urldefrag(href) if not path: continue @@ -100,15 +101,15 @@ class SVGRasterizer(object): def rasterize_item(self, item, stylizer): html = item.data hrefs = self.oeb.manifest.hrefs - for elem in xpath(html, '//h:img'): - src = elem.get('src', None) - image = hrefs.get(item.abshref(src), None) if src else None + for elem in xpath(html, '//h:img[@src]'): + src = urlnormalize(elem.attrib['src']) + image = hrefs.get(item.abshref(src), None) if image and image.media_type == SVG_MIME: style = stylizer.style(elem) self.rasterize_external(elem, style, item, image) - for elem in xpath(html, '//h:object[@type="%s"]' % SVG_MIME): - data = elem.get('data', None) - image = hrefs.get(item.abshref(data), None) if data else None + for elem in xpath(html, '//h:object[@type="%s" and @data]' % SVG_MIME): + data = urlnormalize(elem.attrib['data']) + image = hrefs.get(item.abshref(data), None) if image and image.media_type == SVG_MIME: style = stylizer.style(elem) self.rasterize_external(elem, style, item, image) diff --git a/src/calibre/ebooks/oeb/transforms/trimmanifest.py b/src/calibre/ebooks/oeb/transforms/trimmanifest.py index bc95b43343..643952c03d 100644 --- a/src/calibre/ebooks/oeb/transforms/trimmanifest.py +++ b/src/calibre/ebooks/oeb/transforms/trimmanifest.py @@ -54,7 +54,7 @@ class ManifestTrimmer(object): new.add(found) elif item.media_type == CSS_MIME: def replacer(uri): - absuri = item.abshref(uri) + absuri = item.abshref(urlnormalize(uri)) if absuri in oeb.manifest.hrefs: found = oeb.manifest.hrefs[href] if found not in used: