mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-08 10:44:09 -04:00
Fix #1712 (MOBI error dump from Mac) and make detection if title in MOBI files more robust
This commit is contained in:
commit
e82972af80
@ -28,7 +28,7 @@ from calibre import sanitize_file_name
|
||||
|
||||
class EXTHHeader(object):
|
||||
|
||||
def __init__(self, raw, codec):
|
||||
def __init__(self, raw, codec, title):
|
||||
self.doctype = raw[:4]
|
||||
self.length, self.num_items = struct.unpack('>LL', raw[4:12])
|
||||
raw = raw[12:]
|
||||
@ -52,17 +52,9 @@ class EXTHHeader(object):
|
||||
self.thumbnail_offset, = struct.unpack('>L', content)
|
||||
#else:
|
||||
# print 'unknown record', id, repr(content)
|
||||
title = re.search(r'\0+([^\0]+)\0+', raw[pos:])
|
||||
if title:
|
||||
title = title.group(1).decode(codec, 'replace')
|
||||
if len(title) > 2:
|
||||
self.mi.title = title
|
||||
else:
|
||||
title = re.search(r'\0+([^\0]+)\0+', ''.join(reversed(raw[pos:])))
|
||||
if title:
|
||||
self.mi.title = ''.join(reversed(title.group(1).decode(codec, 'replace')))
|
||||
|
||||
|
||||
self.mi.title = title
|
||||
|
||||
def process_metadata(self, id, content, codec):
|
||||
if id == 100:
|
||||
if self.mi.authors == [_('Unknown')]:
|
||||
@ -121,6 +113,9 @@ class BookHeader(object):
|
||||
if self.compression_type == 'DH':
|
||||
self.huff_offset, self.huff_number = struct.unpack('>LL', raw[0x70:0x78])
|
||||
|
||||
toff, tlen = struct.unpack('>II', raw[0x54:0x5c])
|
||||
tend = toff + tlen
|
||||
self.title = raw[toff:tend] if tend < len(raw) else _('Unknown')
|
||||
langcode = struct.unpack('!L', raw[0x5C:0x60])[0]
|
||||
langid = langcode & 0xFF
|
||||
sublangid = (langcode >> 10) & 0xFF
|
||||
@ -131,7 +126,7 @@ class BookHeader(object):
|
||||
self.exth_flag, = struct.unpack('>L', raw[0x80:0x84])
|
||||
self.exth = None
|
||||
if self.exth_flag & 0x40:
|
||||
self.exth = EXTHHeader(raw[16+self.length:], self.codec)
|
||||
self.exth = EXTHHeader(raw[16+self.length:], self.codec, self.title)
|
||||
self.exth.mi.uid = self.unique_id
|
||||
self.exth.mi.language = self.language
|
||||
|
||||
|
@ -23,6 +23,7 @@ from PIL import Image
|
||||
from calibre.ebooks.oeb.base import XML_NS, XHTML, XHTML_NS, OEB_DOCS, \
|
||||
OEB_RASTER_IMAGES
|
||||
from calibre.ebooks.oeb.base import xpath, barename, namespace, prefixname
|
||||
from calibre.ebooks.oeb.base import urlnormalize
|
||||
from calibre.ebooks.oeb.base import Logger, OEBBook
|
||||
from calibre.ebooks.oeb.profile import Context
|
||||
from calibre.ebooks.oeb.transforms.flatcss import CSSFlattener
|
||||
@ -178,7 +179,7 @@ class Serializer(object):
|
||||
|
||||
def serialize_href(self, href, base=None):
|
||||
hrefs = self.oeb.manifest.hrefs
|
||||
path, frag = urldefrag(href)
|
||||
path, frag = urldefrag(urlnormalize(href))
|
||||
if path and base:
|
||||
path = base.abshref(path)
|
||||
if path and path not in hrefs:
|
||||
|
@ -447,7 +447,7 @@ class Manifest(object):
|
||||
return cmp(skey, okey)
|
||||
|
||||
def relhref(self, href):
|
||||
if '/' not in self.href:
|
||||
if '/' not in self.href or ':' in href:
|
||||
return href
|
||||
base = os.path.dirname(self.href).split('/')
|
||||
target, frag = urldefrag(href)
|
||||
@ -463,7 +463,7 @@ class Manifest(object):
|
||||
return relhref
|
||||
|
||||
def abshref(self, href):
|
||||
if '/' not in self.href:
|
||||
if '/' not in self.href or ':' in href:
|
||||
return href
|
||||
dirname = os.path.dirname(self.href)
|
||||
href = os.path.join(dirname, href)
|
||||
@ -796,6 +796,10 @@ class TOC(object):
|
||||
|
||||
|
||||
class OEBBook(object):
|
||||
COVER_SVG_XP = XPath('h:body//svg:svg[position() = 1]')
|
||||
COVER_OBJECT_XP = XPath('h:body//h:object[@data][position() = 1]')
|
||||
COVER_IMG_XP = XPath('h:body//h:img[@src][position() = 1]')
|
||||
|
||||
def __init__(self, opfpath=None, container=None, encoding=None,
|
||||
logger=FauxLogger()):
|
||||
if opfpath and not container:
|
||||
@ -971,7 +975,7 @@ class OEBBook(object):
|
||||
ncx = item.data
|
||||
self.manifest.remove(item)
|
||||
title = xpath(ncx, 'ncx:docTitle/ncx:text/text()')
|
||||
title = title[0].strip() if title else unicode(self.metadata.title)
|
||||
title = title[0].strip() if title else unicode(self.metadata.title[0])
|
||||
self.toc = toc = TOC(title)
|
||||
navmaps = xpath(ncx, 'ncx:navMap')
|
||||
for navmap in navmaps:
|
||||
@ -1051,42 +1055,56 @@ class OEBBook(object):
|
||||
if self._toc_from_html(opf): return
|
||||
self._toc_from_spine(opf)
|
||||
|
||||
def _ensure_cover_image(self):
|
||||
cover = None
|
||||
def _locate_cover_image(self):
|
||||
if self.metadata.cover:
|
||||
id = str(self.metadata.cover[0])
|
||||
item = self.manifest.ids.get(id, None)
|
||||
if item is not None:
|
||||
return item
|
||||
hcover = self.spine[0]
|
||||
if 'cover' in self.guide:
|
||||
href = self.guide['cover'].href
|
||||
item = self.manifest.hrefs[href]
|
||||
media_type = item.media_type
|
||||
if media_type in OEB_RASTER_IMAGES:
|
||||
cover = item
|
||||
if media_type in OEB_IMAGES:
|
||||
return item
|
||||
elif media_type in OEB_DOCS:
|
||||
hcover = item
|
||||
html = hcover.data
|
||||
if cover is not None:
|
||||
pass
|
||||
elif self.metadata.cover:
|
||||
id = str(self.metadata.cover[0])
|
||||
cover = self.manifest.ids[id]
|
||||
elif MS_COVER_TYPE in self.guide:
|
||||
if MS_COVER_TYPE in self.guide:
|
||||
href = self.guide[MS_COVER_TYPE].href
|
||||
cover = self.manifest.hrefs[href]
|
||||
elif xpath(html, '//h:img[position()=1]'):
|
||||
img = xpath(html, '//h:img[position()=1]')[0]
|
||||
href = hcover.abshref(img.get('src'))
|
||||
cover = self.manifest.hrefs[href]
|
||||
elif xpath(html, '//h:object[position()=1]'):
|
||||
object = xpath(html, '//h:object[position()=1]')[0]
|
||||
href = hcover.abshref(object.get('data'))
|
||||
cover = self.manifest.hrefs[href]
|
||||
elif xpath(html, '//svg:svg[position()=1]'):
|
||||
svg = copy.deepcopy(xpath(html, '//svg:svg[position()=1]')[0])
|
||||
item = self.manifest.hrefs.get(href, None)
|
||||
if item is not None and item.media_type in OEB_IMAGES:
|
||||
return item
|
||||
if self.COVER_SVG_XP(html):
|
||||
svg = copy.deepcopy(self.COVER_SVG_XP(html)[0])
|
||||
href = os.path.splitext(hcover.href)[0] + '.svg'
|
||||
id, href = self.manifest.generate(hcover.id, href)
|
||||
cover = self.manifest.add(id, href, SVG_MIME, data=svg)
|
||||
if cover and not self.metadata.cover:
|
||||
self.metadata.add('cover', cover.id)
|
||||
|
||||
item = self.manifest.add(id, href, SVG_MIME, data=svg)
|
||||
return item
|
||||
if self.COVER_OBJECT_XP(html):
|
||||
object = self.COVER_OBJECT_XP(html)[0]
|
||||
href = hcover.abshref(object.get('data'))
|
||||
item = self.manifest.hrefs.get(href, None)
|
||||
if item is not None and item.media_type in OEB_IMAGES:
|
||||
return item
|
||||
if self.COVER_IMG_XP(html):
|
||||
img = self.COVER_IMG_XP(html)[0]
|
||||
href = hcover.abshref(img.get('src'))
|
||||
item = self.manifest.hrefs.get(href, None)
|
||||
if item is not None and item.media_type in OEB_IMAGES:
|
||||
return item
|
||||
return None
|
||||
|
||||
def _ensure_cover_image(self):
|
||||
cover = self._locate_cover_image()
|
||||
if not cover:
|
||||
return
|
||||
if self.metadata.cover:
|
||||
self.metadata.cover[0].value = cover.id
|
||||
return
|
||||
self.metadata.add('cover', cover.id)
|
||||
|
||||
def _all_from_opf(self, opf):
|
||||
self._metadata_from_opf(opf)
|
||||
self._manifest_from_opf(opf)
|
||||
|
@ -23,6 +23,7 @@ from PyQt4.QtGui import QApplication
|
||||
from calibre.ebooks.oeb.base import XHTML_NS, XHTML, SVG_NS, SVG, XLINK
|
||||
from calibre.ebooks.oeb.base import SVG_MIME, PNG_MIME, JPEG_MIME
|
||||
from calibre.ebooks.oeb.base import xml2str, xpath, namespace, barename
|
||||
from calibre.ebooks.oeb.base import urlnormalize
|
||||
from calibre.ebooks.oeb.stylizer import Stylizer
|
||||
|
||||
IMAGE_TAGS = set([XHTML('img'), XHTML('object')])
|
||||
@ -78,7 +79,7 @@ class SVGRasterizer(object):
|
||||
svg = item.data
|
||||
hrefs = self.oeb.manifest.hrefs
|
||||
for elem in xpath(svg, '//svg:*[@xl:href]'):
|
||||
href = elem.attrib[XLINK('href')]
|
||||
href = urlnormalize(elem.attrib[XLINK('href')])
|
||||
path, frag = urldefrag(href)
|
||||
if not path:
|
||||
continue
|
||||
@ -100,15 +101,15 @@ class SVGRasterizer(object):
|
||||
def rasterize_item(self, item, stylizer):
|
||||
html = item.data
|
||||
hrefs = self.oeb.manifest.hrefs
|
||||
for elem in xpath(html, '//h:img'):
|
||||
src = elem.get('src', None)
|
||||
image = hrefs.get(item.abshref(src), None) if src else None
|
||||
for elem in xpath(html, '//h:img[@src]'):
|
||||
src = urlnormalize(elem.attrib['src'])
|
||||
image = hrefs.get(item.abshref(src), None)
|
||||
if image and image.media_type == SVG_MIME:
|
||||
style = stylizer.style(elem)
|
||||
self.rasterize_external(elem, style, item, image)
|
||||
for elem in xpath(html, '//h:object[@type="%s"]' % SVG_MIME):
|
||||
data = elem.get('data', None)
|
||||
image = hrefs.get(item.abshref(data), None) if data else None
|
||||
for elem in xpath(html, '//h:object[@type="%s" and @data]' % SVG_MIME):
|
||||
data = urlnormalize(elem.attrib['data'])
|
||||
image = hrefs.get(item.abshref(data), None)
|
||||
if image and image.media_type == SVG_MIME:
|
||||
style = stylizer.style(elem)
|
||||
self.rasterize_external(elem, style, item, image)
|
||||
|
@ -54,7 +54,7 @@ class ManifestTrimmer(object):
|
||||
new.add(found)
|
||||
elif item.media_type == CSS_MIME:
|
||||
def replacer(uri):
|
||||
absuri = item.abshref(uri)
|
||||
absuri = item.abshref(urlnormalize(uri))
|
||||
if absuri in oeb.manifest.hrefs:
|
||||
found = oeb.manifest.hrefs[href]
|
||||
if found not in used:
|
||||
|
Loading…
x
Reference in New Issue
Block a user