Fix conversion of MOBI files on windows that contain a : in their titles

2025-07-09 03:04:10 -04:00 · 2008-12-29 12:51:09 -08:00 · 2008-12-29 12:51:09 -08:00 · 662a4641bf
commit 662a4641bf
parent 53e2e7b314
4 changed files with 30 additions and 29 deletions
--- a/src/calibre/init.py
+++ b/src/calibre/init.py
@ -13,7 +13,8 @@ from calibre.startup import plugins, winutil, winutilerror
 from calibre.constants import iswindows, isosx, islinux, isfrozen, \
                              terminal_controller, preferred_encoding, \
                              __appname__, __version__, __author__, \
-                              win32event, win32api, winerror, fcntl
+                              win32event, win32api, winerror, fcntl, \
                              filesystem_encoding
 import mechanize
 mimetypes.add_type('application/epub+zip', '.epub')
@ -41,6 +42,25 @@ def osx_version():
            return int(m.group(1)), int(m.group(2)), int(m.group(3))
 _filename_sanitize = re.compile(r'[\xae\0\\|\?\*<":>\+\[\]/]')
 def sanitize_file_name(name, substitute='_'):
    '''
    Sanitize the filename `name`. All invalid characters are replaced by `substitute`.
    The set of invalid characters is the union of the invalid characters in Windows,
    OS X and Linux. Also removes leading an trailing whitespace.
    **WARNING:** This function also replaces path separators, so only pass file names
    and not full paths to it.
    *NOTE:* This function always returns byte strings, not unicode objects. The byte strings
    are encoded in the filesystem encoding of the platform, or UTF-8. 
    '''
    if isinstance(name, unicode):
        name = name.encode(filesystem_encoding, 'ignore')
    one = _filename_sanitize.sub(substitute, name)
    one = re.sub(r'\s', ' ', one).strip()
    return re.sub(r'^\.+$', '_', one)
 class CommandLineError(Exception):
    pass
@ -201,13 +221,6 @@ class CurrentDir(object):
    def __exit__(self, *args):
        os.chdir(self.cwd)
 def sanitize_file_name(name):
    '''
    Remove characters that are illegal in filenames from name.
    Also remove path separators. All illegal characters are replaced by
    underscores.
    '''
    return re.sub(r'\s', ' ', re.sub(r'[\xae"\'\|\~\:\?\\\/]|^-', '_', name.strip()))
 def detect_ncpus():
    """Detects the number of effective CPUs in the system"""
--- a/src/calibre/constants.py
+++ b/src/calibre/constants.py
@ -29,6 +29,10 @@ winerror   = __import__('winerror') if iswindows else None
 win32api   = __import__('win32api') if iswindows else None
 fcntl      = None if iswindows else __import__('fcntl')
 filesystem_encoding = sys.getfilesystemencoding()
 if filesystem_encoding is None: filesystem_encoding = 'utf-8'
 ################################################################################
 plugins = None
 if plugins is None:
--- a/src/calibre/ebooks/mobi/reader.py
+++ b/src/calibre/ebooks/mobi/reader.py
@ -22,6 +22,7 @@ from calibre.ebooks.mobi.langcodes import main_language, sub_language
 from calibre.ebooks.metadata import MetaInformation
 from calibre.ebooks.metadata.opf import OPFCreator
 from calibre.ebooks.metadata.toc import TOC
 from calibre import sanitize_file_name
 class EXTHHeader(object):
@ -200,7 +201,8 @@ class MobiReader(object):
        guide = soup.find('guide')
        for elem in soup.findAll(['metadata', 'guide']):
            elem.extract()
-        htmlfile = os.path.join(output_dir, self.name+'.html')
+        htmlfile = os.path.join(output_dir, 
                                sanitize_file_name(self.name)+'.html')
        try:
            for ref in guide.findAll('reference', href=True):
                ref['href'] = os.path.basename(htmlfile)+ref['href']
--- a/src/calibre/library/database2.py
+++ b/src/calibre/library/database2.py
@ -21,13 +21,12 @@ from calibre.library.sqlite import connect, IntegrityError
 from calibre.utils.search_query_parser import SearchQueryParser
 from calibre.ebooks.metadata import string_to_authors, authors_to_string
 from calibre.ebooks.metadata.meta import get_metadata
-from calibre.constants import preferred_encoding, iswindows, isosx
+from calibre.constants import preferred_encoding, iswindows, isosx, filesystem_encoding
 from calibre.ptempfile import PersistentTemporaryFile
 from calibre.customize.ui import run_plugins_on_import
 from calibre import sanitize_file_name
 copyfile = os.link if hasattr(os, 'link') else shutil.copyfile
 filesystem_encoding = sys.getfilesystemencoding()
 if filesystem_encoding is None: filesystem_encoding = 'utf-8'
 iscaseinsensitive = iswindows or isosx
 def normpath(x):
@ -37,23 +36,6 @@ def normpath(x):
        x = x.lower()
    return x
 _filename_sanitize = re.compile(r'[\xae\0\\|\?\*<":>\+\[\]/]')
 def sanitize_file_name(name, substitute='_'):
    '''
    Sanitize the filename `name`. All invalid characters are replaced by `substitute`.
    The set of invalid characters is the union of the invalid characters in Windows,
    OS X and Linux. Also removes leading an trailing whitespace.
    **WARNING:** This function also replaces path separators, so only pass file names
    and not full paths to it.
    *NOTE:* This function always returns byte strings, not unicode objects. The byte strings
    are encoded in the filesystem encoding of the platform, or UTF-8. 
    '''
    if isinstance(name, unicode):
        name = name.encode(filesystem_encoding, 'ignore')
    one = _filename_sanitize.sub(substitute, name)
    one = re.sub(r'\s', ' ', one).strip()
    return re.sub(r'^\.+$', '_', one)
 FIELD_MAP = {'id':0, 'title':1, 'authors':2, 'publisher':3, 'rating':4, 'timestamp':5, 
             'size':6, 'tags':7, 'comments':8, 'series':9, 'series_index':10,