Use the IANA registered mimetypes for fonts in the pipeline

Also use the IANA types for EPUB versions > 3.1
Fixes #1659 (Update fonts MIME following IANA recommendation)
This commit is contained in:
Kovid Goyal 2022-06-16 11:08:57 +05:30
parent 0cecc77a22
commit c0a2656cb2
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
12 changed files with 79 additions and 33 deletions

View File

@ -1378,10 +1378,10 @@ application/x-cbc cbc
application/x-koboreader-ebook kobo
image/wmf wmf
application/ereader pdb
# See http://idpf.org/epub/30/spec/epub30-publications.html#sec-core-media-types
application/vnd.ms-opentype otf
application/font-woff woff
application/x-font-truetype ttf
font/otf otf
font/woff woff
font/woff2 woff2
font/ttf ttf
text/xml plist
text/x-markdown md markdown
application/x-ibooks+zip ibook ibooks

View File

@ -45,6 +45,7 @@ class OEBOutput(OutputFormatPlugin):
self.log.exception('Something went wrong while trying to'
' workaround Pocketbook cover bug, ignoring')
self.migrate_lang_code(root)
self.adjust_mime_types(root)
raw = etree.tostring(root, pretty_print=True,
encoding='utf-8', xml_declaration=True)
if key == OPF_MIME:
@ -67,6 +68,15 @@ class OEBOutput(OutputFormatPlugin):
f.write(item.bytes_representation)
item.unload_data_from_memory(memory=path)
def adjust_mime_types(self, root):
from calibre.ebooks.oeb.polish.utils import adjust_mime_for_epub
for x in root.xpath('//*[local-name() = "manifest"]/*[local-name() = "item"]'):
mt = x.get('media-type')
if mt:
nmt = adjust_mime_for_epub(filename=os.path.basename(x.get('href') or ''), mime=mt)
if nmt != mt:
x.set('media-type', nmt)
def workaround_nook_cover_bug(self, root): # {{{
cov = root.xpath('//*[local-name() = "meta" and @name="cover" and'
' @content != "cover"]')

View File

@ -9,7 +9,7 @@ from css_parser.css import CSSRule
from calibre import force_unicode
from calibre.ebooks.oeb.base import OEB_DOCS, OEB_STYLES
from calibre.ebooks.oeb.polish.check.base import BaseError, WARN
from calibre.ebooks.oeb.polish.container import OEB_FONTS
from calibre.ebooks.oeb.polish.utils import OEB_FONTS
from calibre.ebooks.oeb.polish.pretty import pretty_script_or_style
from calibre.ebooks.oeb.polish.fonts import change_font_in_declaration
from calibre.utils.fonts.utils import get_all_font_names, is_font_embeddable, UnsupportedFont

View File

@ -10,11 +10,10 @@ from threading import Thread
from calibre import browser
from calibre.ebooks.oeb.base import OEB_DOCS, OEB_STYLES, urlunquote, XHTML_MIME
from calibre.ebooks.oeb.polish.container import OEB_FONTS
from calibre.ebooks.oeb.polish.parsing import parse_html5
from calibre.ebooks.oeb.polish.replace import remove_links_to
from calibre.ebooks.oeb.polish.cover import get_raster_cover_name
from calibre.ebooks.oeb.polish.utils import guess_type, actual_case_for_name, corrected_case_for_name
from calibre.ebooks.oeb.polish.utils import guess_type, actual_case_for_name, corrected_case_for_name, OEB_FONTS
from calibre.ebooks.oeb.polish.check.base import BaseError, WARN, INFO
from polyglot.builtins import iteritems, itervalues
from polyglot.urllib import urlparse

View File

@ -41,7 +41,7 @@ from calibre.ebooks.oeb.parse_utils import NotHTML, parse_html
from calibre.ebooks.oeb.polish.errors import DRMError, InvalidBook
from calibre.ebooks.oeb.polish.parsing import parse as parse_html_tweak
from calibre.ebooks.oeb.polish.utils import (
CommentFinder, PositionFinder, guess_type, parse_css
CommentFinder, PositionFinder, adjust_mime_for_epub, guess_type, parse_css, OEB_FONTS
)
from calibre.ptempfile import PersistentTemporaryDirectory, PersistentTemporaryFile
from calibre.utils.filenames import hardlink_file, nlinks_file, retry_on_fail
@ -53,10 +53,9 @@ from polyglot.builtins import iteritems
from polyglot.urllib import urlparse
exists, join, relpath = os.path.exists, os.path.join, os.path.relpath
OEB_FONTS = {guess_type('a.ttf'), guess_type('b.otf'), guess_type('a.woff'), 'application/x-font-ttf', 'application/x-font-otf', 'application/font-sfnt'}
OPF_NAMESPACES = {'opf':OPF2_NS, 'dc':DC11_NS}
null = object()
OEB_FONTS # for plugin compat
class CSSPreProcessor(cssp):
@ -141,20 +140,7 @@ class ContainerBase: # {{{
def guess_type(self, name):
' Return the expected mimetype for the specified file name based on its extension. '
# epubcheck complains if the mimetype for text documents is set to
# text/html in EPUB 2 books. Sigh.
ans = guess_type(name)
if ans == 'text/html':
ans = 'application/xhtml+xml'
if ans in {'application/x-font-truetype', 'application/vnd.ms-opentype'}:
opfversion = self.opf_version_parsed[:2]
if opfversion > (3, 0):
return 'application/font-sfnt'
if opfversion >= (3, 0):
# bloody epubcheck has recently decided it likes this mimetype
# for ttf files
return 'application/vnd.ms-opentype'
return ans
return adjust_mime_for_epub(filename=name, opf_version=self.opf_version_parsed)
def decode(self, data, normalize_to_nfc=True):
"""

View File

@ -229,8 +229,7 @@ def replace_file(container, name, path, basename, force_mt=None):
def mt_to_category(container, mt):
from calibre.ebooks.oeb.polish.utils import guess_type
from calibre.ebooks.oeb.polish.container import OEB_FONTS
from calibre.ebooks.oeb.polish.utils import guess_type, OEB_FONTS
from calibre.ebooks.oeb.base import OEB_DOCS, OEB_STYLES
if mt in OEB_DOCS:
category = 'text'

View File

@ -10,7 +10,8 @@ from itertools import chain
from calibre import prepare_string_for_xml, force_unicode
from calibre.ebooks.oeb.base import XPath, xml2text
from calibre.ebooks.oeb.polish.container import OEB_DOCS, OEB_STYLES, OEB_FONTS
from calibre.ebooks.oeb.polish.container import OEB_DOCS, OEB_STYLES
from calibre.ebooks.oeb.polish.utils import OEB_FONTS
from calibre.ebooks.oeb.polish.spell import get_all_words, count_all_chars
from calibre.utils.icu import numeric_sort_key, safe_chr
from calibre.utils.imghdr import identify

View File

@ -9,8 +9,7 @@ import os, sys
from calibre import prints, as_unicode
from calibre.ebooks.oeb.base import OEB_STYLES, OEB_DOCS, XPath, css_text
from calibre.ebooks.oeb.polish.container import OEB_FONTS
from calibre.ebooks.oeb.polish.utils import guess_type
from calibre.ebooks.oeb.polish.utils import guess_type, OEB_FONTS
from calibre.utils.fonts.sfnt.subset import subset
from calibre.utils.fonts.sfnt.errors import UnsupportedFont
from calibre.utils.fonts.utils import get_font_names
@ -34,8 +33,9 @@ def remove_font_face_rules(container, sheet, remove_names, base):
def iter_subsettable_fonts(container):
woff_font_types = guess_type('a.woff'), guess_type('a.woff2')
for name, mt in iteritems(container.mime_map):
if (mt in OEB_FONTS or name.rpartition('.')[-1].lower() in {'otf', 'ttf'}) and mt != guess_type('a.woff'):
if (mt in OEB_FONTS or name.rpartition('.')[-1].lower() in {'otf', 'ttf'}) and mt not in woff_font_types:
yield name, mt

View File

@ -7,7 +7,7 @@ import sys
from calibre.ebooks.metadata.opf_2_to_3 import upgrade_metadata
from calibre.ebooks.oeb.base import EPUB_NS, OEB_DOCS, xpath
from calibre.ebooks.oeb.parse_utils import ensure_namespace_prefixes
from calibre.ebooks.oeb.polish.container import OEB_FONTS
from calibre.ebooks.oeb.polish.utils import OEB_FONTS
from calibre.ebooks.oeb.polish.opf import get_book_language
from calibre.ebooks.oeb.polish.toc import (
commit_nav_toc, find_existing_ncx_toc, get_landmarks, get_toc

View File

@ -14,6 +14,56 @@ def guess_type(x):
return _guess_type(x)[0] or 'application/octet-stream'
# All font mimetypes seen in e-books
OEB_FONTS = frozenset({
'font/otf',
'font/woff',
'font/woff2',
'font/ttf',
'application/x-font-ttf',
'application/x-font-otf',
'application/font-sfnt',
'application/vnd.ms-opentype',
'application/x-font-truetype',
})
def adjust_mime_for_epub(filename='', mime='', opf_version=(2, 0)):
mime = mime or guess_type(filename)
if mime == 'text/html':
# epubcheck complains if the mimetype for text documents is set to text/html in EPUB 2 books. Sigh.
return 'application/xhtml+xml'
if mime not in OEB_FONTS:
return mime
if 'ttf' in mime or 'truetype' in mime:
mime = 'font/ttf'
elif 'otf' in mime or 'opentype' in mime:
mime = 'font/otf'
elif mime == 'application/font-sfnt':
mime = 'font/otf' if filename.lower().endswith('.otf') else 'font/ttf'
elif 'woff2' in mime:
mime = 'font/woff2'
elif 'woff' in mime:
mime = 'font/woff'
opf_version = tuple(opf_version[:2])
if opf_version == (3, 0):
mime = {
'font/ttf': 'application/vnd.ms-opentype', # this is needed by the execrable epubchek
'font/otf': 'application/vnd.ms-opentype',
'font/woff': 'application/font-woff'}.get(mime, mime)
elif opf_version == (3, 1):
mime = {
'font/ttf': 'application/font-sfnt',
'font/otf': 'application/font-sfnt',
'font/woff': 'application/font-woff'}.get(mime, mime)
elif opf_version < (3, 0):
mime = {
'font/ttf': 'application/x-font-truetype',
'font/otf': 'application/vnd.ms-opentype',
'font/woff': 'application/font-woff'}.get(mime, mime)
return mime
def setup_css_parser_serialization(tab_width=2):
import css_parser
prefs = css_parser.ser.prefs

View File

@ -10,7 +10,8 @@ from collections import namedtuple, OrderedDict
from qt.core import QObject, pyqtSignal, Qt
from calibre import prepare_string_for_xml
from calibre.ebooks.oeb.polish.container import OEB_STYLES, OEB_FONTS, name_to_href
from calibre.ebooks.oeb.polish.container import OEB_STYLES, name_to_href
from calibre.ebooks.oeb.polish.utils import OEB_FONTS
from calibre.ebooks.oeb.polish.parsing import parse
from calibre.ebooks.oeb.polish.report import description_for_anchor
from calibre.gui2 import is_gui_thread

View File

@ -19,7 +19,7 @@ from qt.core import (
from calibre import human_readable, sanitize_file_name
from calibre.ebooks.oeb.base import OEB_DOCS, OEB_STYLES
from calibre.ebooks.oeb.polish.container import OEB_FONTS, guess_type
from calibre.ebooks.oeb.polish.utils import OEB_FONTS, guess_type
from calibre.ebooks.oeb.polish.cover import (
get_cover_page_name, get_raster_cover_name, is_raster_image
)