From 0598a2099f7a6af2825840a36183d10af4be9ffc Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 4 Jan 2014 13:01:47 +0530 Subject: [PATCH] Edit book: When checking book, check for entries in the OPF whose mimetype does not match the file extension. --- src/calibre/ebooks/oeb/polish/check/links.py | 35 ++++++++++++++++++++ src/calibre/ebooks/oeb/polish/check/main.py | 3 +- 2 files changed, 37 insertions(+), 1 deletion(-) diff --git a/src/calibre/ebooks/oeb/polish/check/links.py b/src/calibre/ebooks/oeb/polish/check/links.py index 28f44bac00..bdcd0332df 100644 --- a/src/calibre/ebooks/oeb/polish/check/links.py +++ b/src/calibre/ebooks/oeb/polish/check/links.py @@ -68,6 +68,41 @@ class Unmanifested(BadLink): container.remove_item(self.name) return True +class MimetypeMismatch(BaseError): + + level = WARN + + def __init__(self, container, name, opf_mt, ext_mt): + self.opf_mt, self.ext_mt = opf_mt, ext_mt + self.file_name = name + BaseError.__init__(self, _('The file %s has a mimetype that does not match its extension') % name, container.opf_name) + ext = name.rpartition('.')[-1] + self.HELP = _('The file {0} has its mimetype specified as {1} in the OPF file.' + ' The recommended mimetype for files with the extension "{2}" is {3}.' + ' You should change either the file extension or the mimetype in the OPF.').format( + name, opf_mt, ext, ext_mt) + self.INDIVIDUAL_FIX = _('Change the mimetype for this file in the OPF to %s') % ext_mt + + def __call__(self, container): + changed = False + for item in container.opf_xpath('//opf:manifest/opf:item[@href and @media-type="%s"]' % self.opf_mt): + name = container.href_to_name(item.get('href'), container.opf_name) + if name == self.file_name: + changed = True + item.set('media-type', self.ext_mt) + container.mime_map[name] = self.ext_mt + if changed: + container.dirty(container.opf_name) + return changed + +def check_mimetypes(container): + errors = [] + a = errors.append + for name, mt in container.mime_map.iteritems(): + gt = container.guess_type(name) + if mt != gt: + a(MimetypeMismatch(container, name, mt, gt)) + return errors def check_links(container): links_map = defaultdict(set) diff --git a/src/calibre/ebooks/oeb/polish/check/main.py b/src/calibre/ebooks/oeb/polish/check/main.py index 9e61a7ea5e..a1ff319b2a 100644 --- a/src/calibre/ebooks/oeb/polish/check/main.py +++ b/src/calibre/ebooks/oeb/polish/check/main.py @@ -14,7 +14,7 @@ from calibre.ebooks.oeb.polish.cover import is_raster_image from calibre.ebooks.oeb.polish.check.base import run_checkers from calibre.ebooks.oeb.polish.check.parsing import check_xml_parsing, check_css_parsing, fix_style_tag, check_html_size from calibre.ebooks.oeb.polish.check.images import check_raster_images -from calibre.ebooks.oeb.polish.check.links import check_links +from calibre.ebooks.oeb.polish.check.links import check_links, check_mimetypes from calibre.ebooks.oeb.polish.check.fonts import check_fonts XML_TYPES = frozenset(map(guess_type, ('a.xml', 'a.svg', 'a.opf', 'a.ncx'))) @@ -55,6 +55,7 @@ def run_checks(container): if raw: errors.extend(check_css_parsing(name, raw, line_offset=elem.sourceline - 1, is_declaration=True)) + errors += check_mimetypes(container) errors += check_links(container) errors += check_fonts(container)