Use the IANA registered mimetypes for fonts in the pipeline

Also use the IANA types for EPUB versions > 3.1
Fixes #1659 (Update fonts MIME following IANA recommendation)
This commit is contained in:
Kovid Goyal 2022-06-16 11:08:57 +05:30
parent 0cecc77a22
commit c0a2656cb2
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
12 changed files with 79 additions and 33 deletions

View File

@ -1378,10 +1378,10 @@ application/x-cbc cbc
application/x-koboreader-ebook kobo application/x-koboreader-ebook kobo
image/wmf wmf image/wmf wmf
application/ereader pdb application/ereader pdb
# See http://idpf.org/epub/30/spec/epub30-publications.html#sec-core-media-types font/otf otf
application/vnd.ms-opentype otf font/woff woff
application/font-woff woff font/woff2 woff2
application/x-font-truetype ttf font/ttf ttf
text/xml plist text/xml plist
text/x-markdown md markdown text/x-markdown md markdown
application/x-ibooks+zip ibook ibooks application/x-ibooks+zip ibook ibooks

View File

@ -45,6 +45,7 @@ class OEBOutput(OutputFormatPlugin):
self.log.exception('Something went wrong while trying to' self.log.exception('Something went wrong while trying to'
' workaround Pocketbook cover bug, ignoring') ' workaround Pocketbook cover bug, ignoring')
self.migrate_lang_code(root) self.migrate_lang_code(root)
self.adjust_mime_types(root)
raw = etree.tostring(root, pretty_print=True, raw = etree.tostring(root, pretty_print=True,
encoding='utf-8', xml_declaration=True) encoding='utf-8', xml_declaration=True)
if key == OPF_MIME: if key == OPF_MIME:
@ -67,6 +68,15 @@ class OEBOutput(OutputFormatPlugin):
f.write(item.bytes_representation) f.write(item.bytes_representation)
item.unload_data_from_memory(memory=path) item.unload_data_from_memory(memory=path)
def adjust_mime_types(self, root):
from calibre.ebooks.oeb.polish.utils import adjust_mime_for_epub
for x in root.xpath('//*[local-name() = "manifest"]/*[local-name() = "item"]'):
mt = x.get('media-type')
if mt:
nmt = adjust_mime_for_epub(filename=os.path.basename(x.get('href') or ''), mime=mt)
if nmt != mt:
x.set('media-type', nmt)
def workaround_nook_cover_bug(self, root): # {{{ def workaround_nook_cover_bug(self, root): # {{{
cov = root.xpath('//*[local-name() = "meta" and @name="cover" and' cov = root.xpath('//*[local-name() = "meta" and @name="cover" and'
' @content != "cover"]') ' @content != "cover"]')

View File

@ -9,7 +9,7 @@ from css_parser.css import CSSRule
from calibre import force_unicode from calibre import force_unicode
from calibre.ebooks.oeb.base import OEB_DOCS, OEB_STYLES from calibre.ebooks.oeb.base import OEB_DOCS, OEB_STYLES
from calibre.ebooks.oeb.polish.check.base import BaseError, WARN from calibre.ebooks.oeb.polish.check.base import BaseError, WARN
from calibre.ebooks.oeb.polish.container import OEB_FONTS from calibre.ebooks.oeb.polish.utils import OEB_FONTS
from calibre.ebooks.oeb.polish.pretty import pretty_script_or_style from calibre.ebooks.oeb.polish.pretty import pretty_script_or_style
from calibre.ebooks.oeb.polish.fonts import change_font_in_declaration from calibre.ebooks.oeb.polish.fonts import change_font_in_declaration
from calibre.utils.fonts.utils import get_all_font_names, is_font_embeddable, UnsupportedFont from calibre.utils.fonts.utils import get_all_font_names, is_font_embeddable, UnsupportedFont

View File

@ -10,11 +10,10 @@ from threading import Thread
from calibre import browser from calibre import browser
from calibre.ebooks.oeb.base import OEB_DOCS, OEB_STYLES, urlunquote, XHTML_MIME from calibre.ebooks.oeb.base import OEB_DOCS, OEB_STYLES, urlunquote, XHTML_MIME
from calibre.ebooks.oeb.polish.container import OEB_FONTS
from calibre.ebooks.oeb.polish.parsing import parse_html5 from calibre.ebooks.oeb.polish.parsing import parse_html5
from calibre.ebooks.oeb.polish.replace import remove_links_to from calibre.ebooks.oeb.polish.replace import remove_links_to
from calibre.ebooks.oeb.polish.cover import get_raster_cover_name from calibre.ebooks.oeb.polish.cover import get_raster_cover_name
from calibre.ebooks.oeb.polish.utils import guess_type, actual_case_for_name, corrected_case_for_name from calibre.ebooks.oeb.polish.utils import guess_type, actual_case_for_name, corrected_case_for_name, OEB_FONTS
from calibre.ebooks.oeb.polish.check.base import BaseError, WARN, INFO from calibre.ebooks.oeb.polish.check.base import BaseError, WARN, INFO
from polyglot.builtins import iteritems, itervalues from polyglot.builtins import iteritems, itervalues
from polyglot.urllib import urlparse from polyglot.urllib import urlparse

View File

@ -41,7 +41,7 @@ from calibre.ebooks.oeb.parse_utils import NotHTML, parse_html
from calibre.ebooks.oeb.polish.errors import DRMError, InvalidBook from calibre.ebooks.oeb.polish.errors import DRMError, InvalidBook
from calibre.ebooks.oeb.polish.parsing import parse as parse_html_tweak from calibre.ebooks.oeb.polish.parsing import parse as parse_html_tweak
from calibre.ebooks.oeb.polish.utils import ( from calibre.ebooks.oeb.polish.utils import (
CommentFinder, PositionFinder, guess_type, parse_css CommentFinder, PositionFinder, adjust_mime_for_epub, guess_type, parse_css, OEB_FONTS
) )
from calibre.ptempfile import PersistentTemporaryDirectory, PersistentTemporaryFile from calibre.ptempfile import PersistentTemporaryDirectory, PersistentTemporaryFile
from calibre.utils.filenames import hardlink_file, nlinks_file, retry_on_fail from calibre.utils.filenames import hardlink_file, nlinks_file, retry_on_fail
@ -53,10 +53,9 @@ from polyglot.builtins import iteritems
from polyglot.urllib import urlparse from polyglot.urllib import urlparse
exists, join, relpath = os.path.exists, os.path.join, os.path.relpath exists, join, relpath = os.path.exists, os.path.join, os.path.relpath
OEB_FONTS = {guess_type('a.ttf'), guess_type('b.otf'), guess_type('a.woff'), 'application/x-font-ttf', 'application/x-font-otf', 'application/font-sfnt'}
OPF_NAMESPACES = {'opf':OPF2_NS, 'dc':DC11_NS} OPF_NAMESPACES = {'opf':OPF2_NS, 'dc':DC11_NS}
null = object() null = object()
OEB_FONTS # for plugin compat
class CSSPreProcessor(cssp): class CSSPreProcessor(cssp):
@ -141,20 +140,7 @@ class ContainerBase: # {{{
def guess_type(self, name): def guess_type(self, name):
' Return the expected mimetype for the specified file name based on its extension. ' ' Return the expected mimetype for the specified file name based on its extension. '
# epubcheck complains if the mimetype for text documents is set to return adjust_mime_for_epub(filename=name, opf_version=self.opf_version_parsed)
# text/html in EPUB 2 books. Sigh.
ans = guess_type(name)
if ans == 'text/html':
ans = 'application/xhtml+xml'
if ans in {'application/x-font-truetype', 'application/vnd.ms-opentype'}:
opfversion = self.opf_version_parsed[:2]
if opfversion > (3, 0):
return 'application/font-sfnt'
if opfversion >= (3, 0):
# bloody epubcheck has recently decided it likes this mimetype
# for ttf files
return 'application/vnd.ms-opentype'
return ans
def decode(self, data, normalize_to_nfc=True): def decode(self, data, normalize_to_nfc=True):
""" """

View File

@ -229,8 +229,7 @@ def replace_file(container, name, path, basename, force_mt=None):
def mt_to_category(container, mt): def mt_to_category(container, mt):
from calibre.ebooks.oeb.polish.utils import guess_type from calibre.ebooks.oeb.polish.utils import guess_type, OEB_FONTS
from calibre.ebooks.oeb.polish.container import OEB_FONTS
from calibre.ebooks.oeb.base import OEB_DOCS, OEB_STYLES from calibre.ebooks.oeb.base import OEB_DOCS, OEB_STYLES
if mt in OEB_DOCS: if mt in OEB_DOCS:
category = 'text' category = 'text'

View File

@ -10,7 +10,8 @@ from itertools import chain
from calibre import prepare_string_for_xml, force_unicode from calibre import prepare_string_for_xml, force_unicode
from calibre.ebooks.oeb.base import XPath, xml2text from calibre.ebooks.oeb.base import XPath, xml2text
from calibre.ebooks.oeb.polish.container import OEB_DOCS, OEB_STYLES, OEB_FONTS from calibre.ebooks.oeb.polish.container import OEB_DOCS, OEB_STYLES
from calibre.ebooks.oeb.polish.utils import OEB_FONTS
from calibre.ebooks.oeb.polish.spell import get_all_words, count_all_chars from calibre.ebooks.oeb.polish.spell import get_all_words, count_all_chars
from calibre.utils.icu import numeric_sort_key, safe_chr from calibre.utils.icu import numeric_sort_key, safe_chr
from calibre.utils.imghdr import identify from calibre.utils.imghdr import identify

View File

@ -9,8 +9,7 @@ import os, sys
from calibre import prints, as_unicode from calibre import prints, as_unicode
from calibre.ebooks.oeb.base import OEB_STYLES, OEB_DOCS, XPath, css_text from calibre.ebooks.oeb.base import OEB_STYLES, OEB_DOCS, XPath, css_text
from calibre.ebooks.oeb.polish.container import OEB_FONTS from calibre.ebooks.oeb.polish.utils import guess_type, OEB_FONTS
from calibre.ebooks.oeb.polish.utils import guess_type
from calibre.utils.fonts.sfnt.subset import subset from calibre.utils.fonts.sfnt.subset import subset
from calibre.utils.fonts.sfnt.errors import UnsupportedFont from calibre.utils.fonts.sfnt.errors import UnsupportedFont
from calibre.utils.fonts.utils import get_font_names from calibre.utils.fonts.utils import get_font_names
@ -34,8 +33,9 @@ def remove_font_face_rules(container, sheet, remove_names, base):
def iter_subsettable_fonts(container): def iter_subsettable_fonts(container):
woff_font_types = guess_type('a.woff'), guess_type('a.woff2')
for name, mt in iteritems(container.mime_map): for name, mt in iteritems(container.mime_map):
if (mt in OEB_FONTS or name.rpartition('.')[-1].lower() in {'otf', 'ttf'}) and mt != guess_type('a.woff'): if (mt in OEB_FONTS or name.rpartition('.')[-1].lower() in {'otf', 'ttf'}) and mt not in woff_font_types:
yield name, mt yield name, mt

View File

@ -7,7 +7,7 @@ import sys
from calibre.ebooks.metadata.opf_2_to_3 import upgrade_metadata from calibre.ebooks.metadata.opf_2_to_3 import upgrade_metadata
from calibre.ebooks.oeb.base import EPUB_NS, OEB_DOCS, xpath from calibre.ebooks.oeb.base import EPUB_NS, OEB_DOCS, xpath
from calibre.ebooks.oeb.parse_utils import ensure_namespace_prefixes from calibre.ebooks.oeb.parse_utils import ensure_namespace_prefixes
from calibre.ebooks.oeb.polish.container import OEB_FONTS from calibre.ebooks.oeb.polish.utils import OEB_FONTS
from calibre.ebooks.oeb.polish.opf import get_book_language from calibre.ebooks.oeb.polish.opf import get_book_language
from calibre.ebooks.oeb.polish.toc import ( from calibre.ebooks.oeb.polish.toc import (
commit_nav_toc, find_existing_ncx_toc, get_landmarks, get_toc commit_nav_toc, find_existing_ncx_toc, get_landmarks, get_toc

View File

@ -14,6 +14,56 @@ def guess_type(x):
return _guess_type(x)[0] or 'application/octet-stream' return _guess_type(x)[0] or 'application/octet-stream'
# All font mimetypes seen in e-books
OEB_FONTS = frozenset({
'font/otf',
'font/woff',
'font/woff2',
'font/ttf',
'application/x-font-ttf',
'application/x-font-otf',
'application/font-sfnt',
'application/vnd.ms-opentype',
'application/x-font-truetype',
})
def adjust_mime_for_epub(filename='', mime='', opf_version=(2, 0)):
mime = mime or guess_type(filename)
if mime == 'text/html':
# epubcheck complains if the mimetype for text documents is set to text/html in EPUB 2 books. Sigh.
return 'application/xhtml+xml'
if mime not in OEB_FONTS:
return mime
if 'ttf' in mime or 'truetype' in mime:
mime = 'font/ttf'
elif 'otf' in mime or 'opentype' in mime:
mime = 'font/otf'
elif mime == 'application/font-sfnt':
mime = 'font/otf' if filename.lower().endswith('.otf') else 'font/ttf'
elif 'woff2' in mime:
mime = 'font/woff2'
elif 'woff' in mime:
mime = 'font/woff'
opf_version = tuple(opf_version[:2])
if opf_version == (3, 0):
mime = {
'font/ttf': 'application/vnd.ms-opentype', # this is needed by the execrable epubchek
'font/otf': 'application/vnd.ms-opentype',
'font/woff': 'application/font-woff'}.get(mime, mime)
elif opf_version == (3, 1):
mime = {
'font/ttf': 'application/font-sfnt',
'font/otf': 'application/font-sfnt',
'font/woff': 'application/font-woff'}.get(mime, mime)
elif opf_version < (3, 0):
mime = {
'font/ttf': 'application/x-font-truetype',
'font/otf': 'application/vnd.ms-opentype',
'font/woff': 'application/font-woff'}.get(mime, mime)
return mime
def setup_css_parser_serialization(tab_width=2): def setup_css_parser_serialization(tab_width=2):
import css_parser import css_parser
prefs = css_parser.ser.prefs prefs = css_parser.ser.prefs

View File

@ -10,7 +10,8 @@ from collections import namedtuple, OrderedDict
from qt.core import QObject, pyqtSignal, Qt from qt.core import QObject, pyqtSignal, Qt
from calibre import prepare_string_for_xml from calibre import prepare_string_for_xml
from calibre.ebooks.oeb.polish.container import OEB_STYLES, OEB_FONTS, name_to_href from calibre.ebooks.oeb.polish.container import OEB_STYLES, name_to_href
from calibre.ebooks.oeb.polish.utils import OEB_FONTS
from calibre.ebooks.oeb.polish.parsing import parse from calibre.ebooks.oeb.polish.parsing import parse
from calibre.ebooks.oeb.polish.report import description_for_anchor from calibre.ebooks.oeb.polish.report import description_for_anchor
from calibre.gui2 import is_gui_thread from calibre.gui2 import is_gui_thread

View File

@ -19,7 +19,7 @@ from qt.core import (
from calibre import human_readable, sanitize_file_name from calibre import human_readable, sanitize_file_name
from calibre.ebooks.oeb.base import OEB_DOCS, OEB_STYLES from calibre.ebooks.oeb.base import OEB_DOCS, OEB_STYLES
from calibre.ebooks.oeb.polish.container import OEB_FONTS, guess_type from calibre.ebooks.oeb.polish.utils import OEB_FONTS, guess_type
from calibre.ebooks.oeb.polish.cover import ( from calibre.ebooks.oeb.polish.cover import (
get_cover_page_name, get_raster_cover_name, is_raster_image get_cover_page_name, get_raster_cover_name, is_raster_image
) )