diff --git a/resources/mime.types b/resources/mime.types index a460b18249..45caa946f5 100644 --- a/resources/mime.types +++ b/resources/mime.types @@ -1378,10 +1378,10 @@ application/x-cbc cbc application/x-koboreader-ebook kobo image/wmf wmf application/ereader pdb -# See http://idpf.org/epub/30/spec/epub30-publications.html#sec-core-media-types -application/vnd.ms-opentype otf -application/font-woff woff -application/x-font-truetype ttf +font/otf otf +font/woff woff +font/woff2 woff2 +font/ttf ttf text/xml plist text/x-markdown md markdown application/x-ibooks+zip ibook ibooks diff --git a/src/calibre/ebooks/conversion/plugins/oeb_output.py b/src/calibre/ebooks/conversion/plugins/oeb_output.py index 774537481f..ed91bfdcfb 100644 --- a/src/calibre/ebooks/conversion/plugins/oeb_output.py +++ b/src/calibre/ebooks/conversion/plugins/oeb_output.py @@ -45,6 +45,7 @@ class OEBOutput(OutputFormatPlugin): self.log.exception('Something went wrong while trying to' ' workaround Pocketbook cover bug, ignoring') self.migrate_lang_code(root) + self.adjust_mime_types(root) raw = etree.tostring(root, pretty_print=True, encoding='utf-8', xml_declaration=True) if key == OPF_MIME: @@ -67,6 +68,15 @@ class OEBOutput(OutputFormatPlugin): f.write(item.bytes_representation) item.unload_data_from_memory(memory=path) + def adjust_mime_types(self, root): + from calibre.ebooks.oeb.polish.utils import adjust_mime_for_epub + for x in root.xpath('//*[local-name() = "manifest"]/*[local-name() = "item"]'): + mt = x.get('media-type') + if mt: + nmt = adjust_mime_for_epub(filename=os.path.basename(x.get('href') or ''), mime=mt) + if nmt != mt: + x.set('media-type', nmt) + def workaround_nook_cover_bug(self, root): # {{{ cov = root.xpath('//*[local-name() = "meta" and @name="cover" and' ' @content != "cover"]') diff --git a/src/calibre/ebooks/oeb/polish/check/fonts.py b/src/calibre/ebooks/oeb/polish/check/fonts.py index 247d667744..7bfb4854be 100644 --- a/src/calibre/ebooks/oeb/polish/check/fonts.py +++ b/src/calibre/ebooks/oeb/polish/check/fonts.py @@ -9,7 +9,7 @@ from css_parser.css import CSSRule from calibre import force_unicode from calibre.ebooks.oeb.base import OEB_DOCS, OEB_STYLES from calibre.ebooks.oeb.polish.check.base import BaseError, WARN -from calibre.ebooks.oeb.polish.container import OEB_FONTS +from calibre.ebooks.oeb.polish.utils import OEB_FONTS from calibre.ebooks.oeb.polish.pretty import pretty_script_or_style from calibre.ebooks.oeb.polish.fonts import change_font_in_declaration from calibre.utils.fonts.utils import get_all_font_names, is_font_embeddable, UnsupportedFont diff --git a/src/calibre/ebooks/oeb/polish/check/links.py b/src/calibre/ebooks/oeb/polish/check/links.py index 5d1e0bc880..a039a56aed 100644 --- a/src/calibre/ebooks/oeb/polish/check/links.py +++ b/src/calibre/ebooks/oeb/polish/check/links.py @@ -10,11 +10,10 @@ from threading import Thread from calibre import browser from calibre.ebooks.oeb.base import OEB_DOCS, OEB_STYLES, urlunquote, XHTML_MIME -from calibre.ebooks.oeb.polish.container import OEB_FONTS from calibre.ebooks.oeb.polish.parsing import parse_html5 from calibre.ebooks.oeb.polish.replace import remove_links_to from calibre.ebooks.oeb.polish.cover import get_raster_cover_name -from calibre.ebooks.oeb.polish.utils import guess_type, actual_case_for_name, corrected_case_for_name +from calibre.ebooks.oeb.polish.utils import guess_type, actual_case_for_name, corrected_case_for_name, OEB_FONTS from calibre.ebooks.oeb.polish.check.base import BaseError, WARN, INFO from polyglot.builtins import iteritems, itervalues from polyglot.urllib import urlparse diff --git a/src/calibre/ebooks/oeb/polish/container.py b/src/calibre/ebooks/oeb/polish/container.py index 1568725479..74e0a26f9c 100644 --- a/src/calibre/ebooks/oeb/polish/container.py +++ b/src/calibre/ebooks/oeb/polish/container.py @@ -41,7 +41,7 @@ from calibre.ebooks.oeb.parse_utils import NotHTML, parse_html from calibre.ebooks.oeb.polish.errors import DRMError, InvalidBook from calibre.ebooks.oeb.polish.parsing import parse as parse_html_tweak from calibre.ebooks.oeb.polish.utils import ( - CommentFinder, PositionFinder, guess_type, parse_css + CommentFinder, PositionFinder, adjust_mime_for_epub, guess_type, parse_css, OEB_FONTS ) from calibre.ptempfile import PersistentTemporaryDirectory, PersistentTemporaryFile from calibre.utils.filenames import hardlink_file, nlinks_file, retry_on_fail @@ -53,10 +53,9 @@ from polyglot.builtins import iteritems from polyglot.urllib import urlparse exists, join, relpath = os.path.exists, os.path.join, os.path.relpath - -OEB_FONTS = {guess_type('a.ttf'), guess_type('b.otf'), guess_type('a.woff'), 'application/x-font-ttf', 'application/x-font-otf', 'application/font-sfnt'} OPF_NAMESPACES = {'opf':OPF2_NS, 'dc':DC11_NS} null = object() +OEB_FONTS # for plugin compat class CSSPreProcessor(cssp): @@ -141,20 +140,7 @@ class ContainerBase: # {{{ def guess_type(self, name): ' Return the expected mimetype for the specified file name based on its extension. ' - # epubcheck complains if the mimetype for text documents is set to - # text/html in EPUB 2 books. Sigh. - ans = guess_type(name) - if ans == 'text/html': - ans = 'application/xhtml+xml' - if ans in {'application/x-font-truetype', 'application/vnd.ms-opentype'}: - opfversion = self.opf_version_parsed[:2] - if opfversion > (3, 0): - return 'application/font-sfnt' - if opfversion >= (3, 0): - # bloody epubcheck has recently decided it likes this mimetype - # for ttf files - return 'application/vnd.ms-opentype' - return ans + return adjust_mime_for_epub(filename=name, opf_version=self.opf_version_parsed) def decode(self, data, normalize_to_nfc=True): """ diff --git a/src/calibre/ebooks/oeb/polish/replace.py b/src/calibre/ebooks/oeb/polish/replace.py index f8e60e50df..efc09be273 100644 --- a/src/calibre/ebooks/oeb/polish/replace.py +++ b/src/calibre/ebooks/oeb/polish/replace.py @@ -229,8 +229,7 @@ def replace_file(container, name, path, basename, force_mt=None): def mt_to_category(container, mt): - from calibre.ebooks.oeb.polish.utils import guess_type - from calibre.ebooks.oeb.polish.container import OEB_FONTS + from calibre.ebooks.oeb.polish.utils import guess_type, OEB_FONTS from calibre.ebooks.oeb.base import OEB_DOCS, OEB_STYLES if mt in OEB_DOCS: category = 'text' diff --git a/src/calibre/ebooks/oeb/polish/report.py b/src/calibre/ebooks/oeb/polish/report.py index 31b2e3da35..a70c55c4bd 100644 --- a/src/calibre/ebooks/oeb/polish/report.py +++ b/src/calibre/ebooks/oeb/polish/report.py @@ -10,7 +10,8 @@ from itertools import chain from calibre import prepare_string_for_xml, force_unicode from calibre.ebooks.oeb.base import XPath, xml2text -from calibre.ebooks.oeb.polish.container import OEB_DOCS, OEB_STYLES, OEB_FONTS +from calibre.ebooks.oeb.polish.container import OEB_DOCS, OEB_STYLES +from calibre.ebooks.oeb.polish.utils import OEB_FONTS from calibre.ebooks.oeb.polish.spell import get_all_words, count_all_chars from calibre.utils.icu import numeric_sort_key, safe_chr from calibre.utils.imghdr import identify diff --git a/src/calibre/ebooks/oeb/polish/subset.py b/src/calibre/ebooks/oeb/polish/subset.py index 64bca7ddd1..1d59cc3ef7 100644 --- a/src/calibre/ebooks/oeb/polish/subset.py +++ b/src/calibre/ebooks/oeb/polish/subset.py @@ -9,8 +9,7 @@ import os, sys from calibre import prints, as_unicode from calibre.ebooks.oeb.base import OEB_STYLES, OEB_DOCS, XPath, css_text -from calibre.ebooks.oeb.polish.container import OEB_FONTS -from calibre.ebooks.oeb.polish.utils import guess_type +from calibre.ebooks.oeb.polish.utils import guess_type, OEB_FONTS from calibre.utils.fonts.sfnt.subset import subset from calibre.utils.fonts.sfnt.errors import UnsupportedFont from calibre.utils.fonts.utils import get_font_names @@ -34,8 +33,9 @@ def remove_font_face_rules(container, sheet, remove_names, base): def iter_subsettable_fonts(container): + woff_font_types = guess_type('a.woff'), guess_type('a.woff2') for name, mt in iteritems(container.mime_map): - if (mt in OEB_FONTS or name.rpartition('.')[-1].lower() in {'otf', 'ttf'}) and mt != guess_type('a.woff'): + if (mt in OEB_FONTS or name.rpartition('.')[-1].lower() in {'otf', 'ttf'}) and mt not in woff_font_types: yield name, mt diff --git a/src/calibre/ebooks/oeb/polish/upgrade.py b/src/calibre/ebooks/oeb/polish/upgrade.py index f241a01ce2..4658beee04 100644 --- a/src/calibre/ebooks/oeb/polish/upgrade.py +++ b/src/calibre/ebooks/oeb/polish/upgrade.py @@ -7,7 +7,7 @@ import sys from calibre.ebooks.metadata.opf_2_to_3 import upgrade_metadata from calibre.ebooks.oeb.base import EPUB_NS, OEB_DOCS, xpath from calibre.ebooks.oeb.parse_utils import ensure_namespace_prefixes -from calibre.ebooks.oeb.polish.container import OEB_FONTS +from calibre.ebooks.oeb.polish.utils import OEB_FONTS from calibre.ebooks.oeb.polish.opf import get_book_language from calibre.ebooks.oeb.polish.toc import ( commit_nav_toc, find_existing_ncx_toc, get_landmarks, get_toc diff --git a/src/calibre/ebooks/oeb/polish/utils.py b/src/calibre/ebooks/oeb/polish/utils.py index 25c6da7497..0ec5ffc25b 100644 --- a/src/calibre/ebooks/oeb/polish/utils.py +++ b/src/calibre/ebooks/oeb/polish/utils.py @@ -14,6 +14,56 @@ def guess_type(x): return _guess_type(x)[0] or 'application/octet-stream' +# All font mimetypes seen in e-books +OEB_FONTS = frozenset({ + 'font/otf', + 'font/woff', + 'font/woff2', + 'font/ttf', + 'application/x-font-ttf', + 'application/x-font-otf', + 'application/font-sfnt', + 'application/vnd.ms-opentype', + 'application/x-font-truetype', +}) + + +def adjust_mime_for_epub(filename='', mime='', opf_version=(2, 0)): + mime = mime or guess_type(filename) + if mime == 'text/html': + # epubcheck complains if the mimetype for text documents is set to text/html in EPUB 2 books. Sigh. + return 'application/xhtml+xml' + if mime not in OEB_FONTS: + return mime + if 'ttf' in mime or 'truetype' in mime: + mime = 'font/ttf' + elif 'otf' in mime or 'opentype' in mime: + mime = 'font/otf' + elif mime == 'application/font-sfnt': + mime = 'font/otf' if filename.lower().endswith('.otf') else 'font/ttf' + elif 'woff2' in mime: + mime = 'font/woff2' + elif 'woff' in mime: + mime = 'font/woff' + opf_version = tuple(opf_version[:2]) + if opf_version == (3, 0): + mime = { + 'font/ttf': 'application/vnd.ms-opentype', # this is needed by the execrable epubchek + 'font/otf': 'application/vnd.ms-opentype', + 'font/woff': 'application/font-woff'}.get(mime, mime) + elif opf_version == (3, 1): + mime = { + 'font/ttf': 'application/font-sfnt', + 'font/otf': 'application/font-sfnt', + 'font/woff': 'application/font-woff'}.get(mime, mime) + elif opf_version < (3, 0): + mime = { + 'font/ttf': 'application/x-font-truetype', + 'font/otf': 'application/vnd.ms-opentype', + 'font/woff': 'application/font-woff'}.get(mime, mime) + return mime + + def setup_css_parser_serialization(tab_width=2): import css_parser prefs = css_parser.ser.prefs diff --git a/src/calibre/gui2/tweak_book/completion/basic.py b/src/calibre/gui2/tweak_book/completion/basic.py index 6469fa8193..3962951bb6 100644 --- a/src/calibre/gui2/tweak_book/completion/basic.py +++ b/src/calibre/gui2/tweak_book/completion/basic.py @@ -10,7 +10,8 @@ from collections import namedtuple, OrderedDict from qt.core import QObject, pyqtSignal, Qt from calibre import prepare_string_for_xml -from calibre.ebooks.oeb.polish.container import OEB_STYLES, OEB_FONTS, name_to_href +from calibre.ebooks.oeb.polish.container import OEB_STYLES, name_to_href +from calibre.ebooks.oeb.polish.utils import OEB_FONTS from calibre.ebooks.oeb.polish.parsing import parse from calibre.ebooks.oeb.polish.report import description_for_anchor from calibre.gui2 import is_gui_thread diff --git a/src/calibre/gui2/tweak_book/file_list.py b/src/calibre/gui2/tweak_book/file_list.py index ad053c151f..b2da9aef81 100644 --- a/src/calibre/gui2/tweak_book/file_list.py +++ b/src/calibre/gui2/tweak_book/file_list.py @@ -19,7 +19,7 @@ from qt.core import ( from calibre import human_readable, sanitize_file_name from calibre.ebooks.oeb.base import OEB_DOCS, OEB_STYLES -from calibre.ebooks.oeb.polish.container import OEB_FONTS, guess_type +from calibre.ebooks.oeb.polish.utils import OEB_FONTS, guess_type from calibre.ebooks.oeb.polish.cover import ( get_cover_page_name, get_raster_cover_name, is_raster_image )