EPUB Output: Sort the entries in the manifest by spine position/media-type/filename

This commit is contained in:
Kovid Goyal 2019-03-28 11:00:41 +05:30
parent cf1a6aea96
commit 775ffdee21
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C

View File

@ -6,11 +6,13 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>' __copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import os, re, logging import os, re, logging, sys, numbers
from collections import defaultdict from collections import defaultdict
from itertools import count from itertools import count
from operator import attrgetter
from lxml import etree, html from lxml import etree, html
from calibre import force_unicode
from calibre.constants import filesystem_encoding, __version__, ispy3 from calibre.constants import filesystem_encoding, __version__, ispy3
from calibre.translations.dynamic import translate from calibre.translations.dynamic import translate
from calibre.ebooks.chardet import xml_to_unicode from calibre.ebooks.chardet import xml_to_unicode
@ -20,8 +22,9 @@ from calibre.ebooks.oeb.parse_utils import (barename, XHTML_NS, RECOVER_PARSER,
namespace, XHTML, parse_html, NotHTML) namespace, XHTML, parse_html, NotHTML)
from calibre.utils.cleantext import clean_xml_chars from calibre.utils.cleantext import clean_xml_chars
from calibre.utils.short_uuid import uuid4 from calibre.utils.short_uuid import uuid4
from polyglot.builtins import iteritems, unicode_type, string_or_bytes, range from polyglot.builtins import iteritems, unicode_type, string_or_bytes, range, itervalues
from polyglot.urllib import unquote, urldefrag, urljoin, urlparse, urlunparse from polyglot.urllib import unquote, urldefrag, urljoin, urlparse, urlunparse
from calibre.utils.icu import numeric_sort_key
XML_NS = 'http://www.w3.org/XML/1998/namespace' XML_NS = 'http://www.w3.org/XML/1998/namespace'
OEB_DOC_NS = 'http://openebook.org/namespaces/oeb-document/1.0/' OEB_DOC_NS = 'http://openebook.org/namespaces/oeb-document/1.0/'
@ -935,8 +938,6 @@ class Manifest(object):
have a :attr:`spine_position` of `None`. have a :attr:`spine_position` of `None`.
""" """
NUM_RE = re.compile('^(.*)([0-9][0-9.]*)(?=[.]|$)')
def __init__(self, oeb, id, href, media_type, def __init__(self, oeb, id, href, media_type,
fallback=None, loader=str, data=None): fallback=None, loader=str, data=None):
if href: if href:
@ -1122,24 +1123,18 @@ class Manifest(object):
return serialize(self.data, self.media_type, pretty_print=self.oeb.pretty_print) return serialize(self.data, self.media_type, pretty_print=self.oeb.pretty_print)
def __eq__(self, other): def __eq__(self, other):
return id(self) == id(other) return self is other
def __ne__(self, other): def __ne__(self, other):
return not self.__eq__(other) return self is not other
def __cmp__(self, other): @property
result = cmp(self.spine_position, other.spine_position) def sort_key(self):
if result != 0: href = self.href
return result if isinstance(href, bytes):
smatch = self.NUM_RE.search(self.href) href = force_unicode(href)
sref = smatch.group(1) if smatch else self.href sp = self.spine_position if isinstance(self.spine_position, numbers.Number) else sys.maxsize
snum = float(smatch.group(2)) if smatch else 0.0 return sp, (self.media_type or '').lower(), numeric_sort_key(href), self.id
skey = (sref, snum, self.id)
omatch = self.NUM_RE.search(other.href)
oref = omatch.group(1) if omatch else other.href
onum = float(omatch.group(2)) if omatch else 0.0
okey = (oref, onum, other.id)
return cmp(skey, okey)
def relhref(self, href): def relhref(self, href):
"""Convert the URL provided in :param:`href` from a book-absolute """Convert the URL provided in :param:`href` from a book-absolute
@ -1269,7 +1264,7 @@ class Manifest(object):
def to_opf2(self, parent=None): def to_opf2(self, parent=None):
elem = element(parent, OPF('manifest')) elem = element(parent, OPF('manifest'))
for item in sorted(self.items, key=lambda x: x.href): for item in sorted(self.items, key=attrgetter('sort_key')):
media_type = item.media_type media_type = item.media_type
if media_type in OEB_DOCS: if media_type in OEB_DOCS:
media_type = XHTML_MIME media_type = XHTML_MIME
@ -1417,9 +1412,9 @@ class Guide(object):
('notes', __('Notes')), ('notes', __('Notes')),
('preface', __('Preface')), ('preface', __('Preface')),
('text', __('Main text'))] ('text', __('Main text'))]
TYPES = set(t for t, _ in _TYPES_TITLES) # noqa
TITLES = dict(_TYPES_TITLES) TITLES = dict(_TYPES_TITLES)
ORDER = dict((t, i) for i, (t, _) in enumerate(_TYPES_TITLES)) # noqa TYPES = frozenset(TITLES)
ORDER = {t: i for i, (t, _) in enumerate(_TYPES_TITLES)}
def __init__(self, oeb, type, title, href): def __init__(self, oeb, type, title, href):
self.oeb = oeb self.oeb = oeb
@ -1438,17 +1433,6 @@ class Guide(object):
return 'Reference(type=%r, title=%r, href=%r)' \ return 'Reference(type=%r, title=%r, href=%r)' \
% (self.type, self.title, self.href) % (self.type, self.title, self.href)
@dynamic_property
def _order(self):
def fget(self):
return self.ORDER.get(self.type, self.type)
return property(fget=fget)
def __cmp__(self, other):
if not isinstance(other, Guide.Reference):
return NotImplemented
return cmp(self._order, other._order)
@dynamic_property @dynamic_property
def item(self): def item(self):
doc = """The manifest item associated with this reference.""" doc = """The manifest item associated with this reference."""
@ -1485,7 +1469,7 @@ class Guide(object):
__iter__ = iterkeys __iter__ = iterkeys
def values(self): def values(self):
return sorted(self.refs.values()) return sorted(itervalues(self.refs), key=lambda ref: ref.ORDER.get(ref.type, 10000))
def items(self): def items(self):
for type, ref in self.refs.items(): for type, ref in self.refs.items():