Factor out code to make a unique name

This commit is contained in:
Kovid Goyal 2017-02-21 10:52:02 +05:30
parent f1cfe3cb29
commit de114b91f0
2 changed files with 52 additions and 40 deletions

View File

@ -1,50 +1,63 @@
#!/usr/bin/env python2 #!/usr/bin/env python2
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
from __future__ import (unicode_literals, division, absolute_import, # License: GPLv3 Copyright: 2013, Kovid Goyal <kovid at kovidgoyal.net>
print_function) from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL v3' import errno
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>' import hashlib
__docformat__ = 'restructuredtext en' import logging
import os
import os, logging, sys, hashlib, uuid, re, shutil, unicodedata, errno, time import re
import shutil
import sys
import time
import unicodedata
import uuid
from collections import defaultdict from collections import defaultdict
from io import BytesIO
from urlparse import urlparse
from future_builtins import zip from future_builtins import zip
from io import BytesIO
from itertools import count
from urlparse import urlparse
from cssutils import getUrls, replaceUrls
from lxml import etree from lxml import etree
from cssutils import replaceUrls, getUrls
from calibre import CurrentDir from calibre import CurrentDir
from calibre.constants import iswindows from calibre.constants import iswindows
from calibre.customize.ui import (plugin_for_input_format, plugin_for_output_format) from calibre.customize.ui import plugin_for_input_format, plugin_for_output_format
from calibre.ebooks import escape_xpath_attr from calibre.ebooks import escape_xpath_attr
from calibre.ebooks.chardet import xml_to_unicode from calibre.ebooks.chardet import xml_to_unicode
from calibre.ebooks.conversion.plugins.epub_input import ( from calibre.ebooks.conversion.plugins.epub_input import (
ADOBE_OBFUSCATION, IDPF_OBFUSCATION, decrypt_font_data) ADOBE_OBFUSCATION, IDPF_OBFUSCATION, decrypt_font_data
from calibre.ebooks.conversion.preprocess import HTMLPreProcessor, CSSPreProcessor as cssp )
from calibre.ebooks.metadata.opf3 import read_prefixes, items_with_property, ensure_prefix, CALIBRE_PREFIX from calibre.ebooks.conversion.preprocess import (
CSSPreProcessor as cssp, HTMLPreProcessor
)
from calibre.ebooks.metadata.opf3 import (
CALIBRE_PREFIX, ensure_prefix, items_with_property, read_prefixes
)
from calibre.ebooks.metadata.utils import parse_opf_version from calibre.ebooks.metadata.utils import parse_opf_version
from calibre.ebooks.mobi import MobiError from calibre.ebooks.mobi import MobiError
from calibre.ebooks.mobi.reader.headers import MetadataHeader from calibre.ebooks.mobi.reader.headers import MetadataHeader
from calibre.ebooks.mobi.tweak import set_cover from calibre.ebooks.mobi.tweak import set_cover
from calibre.ebooks.oeb.base import ( from calibre.ebooks.oeb.base import (
serialize, OEB_DOCS, OEB_STYLES, OPF2_NS, DC11_NS, OPF, Manifest, DC11_NS, OEB_DOCS, OEB_STYLES, OPF, OPF2_NS, Manifest, itercsslinks, iterlinks,
rewrite_links, iterlinks, itercsslinks, urlquote, urlunquote) rewrite_links, serialize, urlquote, urlunquote
from calibre.ebooks.oeb.polish.errors import InvalidBook, DRMError )
from calibre.ebooks.oeb.parse_utils import RECOVER_PARSER, NotHTML, parse_html
from calibre.ebooks.oeb.polish.errors import DRMError, InvalidBook
from calibre.ebooks.oeb.polish.parsing import parse as parse_html_tweak from calibre.ebooks.oeb.polish.parsing import parse as parse_html_tweak
from calibre.ebooks.oeb.polish.utils import PositionFinder, CommentFinder, guess_type, parse_css from calibre.ebooks.oeb.polish.utils import (
from calibre.ebooks.oeb.parse_utils import NotHTML, parse_html, RECOVER_PARSER CommentFinder, PositionFinder, guess_type, parse_css
)
from calibre.ptempfile import PersistentTemporaryDirectory, PersistentTemporaryFile from calibre.ptempfile import PersistentTemporaryDirectory, PersistentTemporaryFile
from calibre.utils.filenames import nlinks_file, hardlink_file from calibre.utils.filenames import hardlink_file, nlinks_file
from calibre.utils.ipc.simple_worker import fork_job, WorkerError from calibre.utils.ipc.simple_worker import WorkerError, fork_job
from calibre.utils.logging import default_log from calibre.utils.logging import default_log
from calibre.utils.zipfile import ZipFile from calibre.utils.zipfile import ZipFile
exists, join, relpath = os.path.exists, os.path.join, os.path.relpath exists, join, relpath = os.path.exists, os.path.join, os.path.relpath
OEB_FONTS = {guess_type('a.ttf'), guess_type('b.otf'), guess_type('a.woff'), 'application/x-font-ttf', 'application/x-font-otf', 'application/font-sfnt'} OEB_FONTS = {guess_type('a.ttf'), guess_type('b.otf'), guess_type('a.woff'), 'application/x-font-ttf', 'application/x-font-otf', 'application/font-sfnt'}
OPF_NAMESPACES = {'opf':OPF2_NS, 'dc':DC11_NS} OPF_NAMESPACES = {'opf':OPF2_NS, 'dc':DC11_NS}
@ -319,6 +332,16 @@ class Container(ContainerBase): # {{{
all_names = {self.href_to_name(x.get('href'), self.opf_name) for x in self.opf_xpath('//opf:manifest/opf:item[@href]')} all_names = {self.href_to_name(x.get('href'), self.opf_name) for x in self.opf_xpath('//opf:manifest/opf:item[@href]')}
return name in all_names return name in all_names
def make_name_unique(self, name):
counter = count()
while self.has_name_case_insensitive(name) or self.manifest_has_name(name):
c = next(counter) + 1
base, ext = name.rpartition('.')[::2]
if c > 1:
base = base.rpartition('-')[0]
name = '%s-%d.%s' % (base, c, ext)
return name
def add_file(self, name, data, media_type=None, spine_index=None, modify_name_if_needed=False, process_manifest_item=None): def add_file(self, name, data, media_type=None, spine_index=None, modify_name_if_needed=False, process_manifest_item=None):
''' Add a file to this container. Entries for the file are ''' Add a file to this container. Entries for the file are
automatically created in the OPF manifest and spine automatically created in the OPF manifest and spine
@ -330,15 +353,8 @@ class Container(ContainerBase): # {{{
if not modify_name_if_needed: if not modify_name_if_needed:
raise ValueError(('A file with the name %s already exists' % name) if self.has_name_case_insensitive(name) else raise ValueError(('A file with the name %s already exists' % name) if self.has_name_case_insensitive(name) else
('An item with the href %s already exists in the manifest' % href)) ('An item with the href %s already exists in the manifest' % href))
base, ext = name.rpartition('.')[::2] name = self.make_name_unique(name)
c = 0 href = self.name_to_href(name, self.opf_name)
while True:
c += 1
q = '%s-%d.%s' % (base, c, ext)
href = self.name_to_href(q, self.opf_name)
if not self.has_name_case_insensitive(q) and not self.manifest_has_name(q):
name = q
break
path = self.name_to_abspath(name) path = self.name_to_abspath(name)
base = os.path.dirname(path) base = os.path.dirname(path)
if not os.path.exists(base): if not os.path.exists(base):
@ -871,6 +887,8 @@ class Container(ContainerBase): # {{{
generated item.''' generated item.'''
id_prefix = id_prefix or 'id' id_prefix = id_prefix or 'id'
media_type = media_type or guess_type(name) media_type = media_type or guess_type(name)
if unique_href:
name = self.make_name_unique(name)
href = self.name_to_href(name, self.opf_name) href = self.name_to_href(name, self.opf_name)
base, ext = href.rpartition('.')[0::2] base, ext = href.rpartition('.')[0::2]
all_ids = {x.get('id') for x in self.opf_xpath('//*[@id]')} all_ids = {x.get('id') for x in self.opf_xpath('//*[@id]')}
@ -880,15 +898,6 @@ class Container(ContainerBase): # {{{
c += 1 c += 1
item_id = id_prefix + '%d'%c item_id = id_prefix + '%d'%c
def exists(h):
n = self.href_to_name(h, self.opf_name)
return self.exists(n) or self.manifest_has_name(n)
if unique_href:
c = 0
while exists(href):
c += 1
href = '%s_%d.%s'%(base, c, ext)
manifest = self.opf_xpath('//opf:manifest')[0] manifest = self.opf_xpath('//opf:manifest')[0]
item = manifest.makeelement(OPF('item'), item = manifest.makeelement(OPF('item'),
id=item_id, href=href) id=item_id, href=href)

View File

@ -186,6 +186,9 @@ class ContainerTests(BaseTest):
self.assertEqual('xxx', c.raw_data(name)) self.assertEqual('xxx', c.raw_data(name))
self.assertIn(name, set(c.manifest_id_map.itervalues())) self.assertIn(name, set(c.manifest_id_map.itervalues()))
self.assertNotIn(name, {x[0] for x in c.spine_names}) self.assertNotIn(name, {x[0] for x in c.spine_names})
self.assertEqual(c.make_name_unique(name), 'added-1.css')
c.add_file('added-1.css', b'xxx')
self.assertEqual(c.make_name_unique(name.upper()), 'added-2.css'.upper())
self.check_links(c) self.check_links(c)