Fix conversion of MOBI files on windows that contain a : in their titles

2025-07-09 03:04:10 -04:00 · 2008-12-29 12:51:09 -08:00 · 2008-12-29 12:51:09 -08:00 · 662a4641bf
commit 662a4641bf
parent 53e2e7b314
4 changed files with 30 additions and 29 deletions
--- a/src/calibre/init.py
+++ b/src/calibre/init.py
@ -13,7 +13,8 @@ from calibre.startup import plugins, winutil, winutilerror
 from calibre.constants import iswindows, isosx, islinux, isfrozen, \
                              terminal_controller, preferred_encoding, \
                              __appname__, __version__, __author__, \
-                              win32event, win32api, winerror, fcntl
+                              win32event, win32api, winerror, fcntl, \
+                              filesystem_encoding
 import mechanize

 mimetypes.add_type('application/epub+zip', '.epub')
@ -41,6 +42,25 @@ def osx_version():
            return int(m.group(1)), int(m.group(2)), int(m.group(3))


+_filename_sanitize = re.compile(r'[\xae\0\\|\?\*<":>\+\[\]/]')
+
+def sanitize_file_name(name, substitute='_'):
+    '''
+    Sanitize the filename `name`. All invalid characters are replaced by `substitute`.
+    The set of invalid characters is the union of the invalid characters in Windows,
+    OS X and Linux. Also removes leading an trailing whitespace.
+    **WARNING:** This function also replaces path separators, so only pass file names
+    and not full paths to it.
+    *NOTE:* This function always returns byte strings, not unicode objects. The byte strings
+    are encoded in the filesystem encoding of the platform, or UTF-8. 
+    '''
+    if isinstance(name, unicode):
+        name = name.encode(filesystem_encoding, 'ignore')
+    one = _filename_sanitize.sub(substitute, name)
+    one = re.sub(r'\s', ' ', one).strip()
+    return re.sub(r'^\.+$', '_', one)
+
+
 class CommandLineError(Exception):
    pass

@ -201,13 +221,6 @@ class CurrentDir(object):
    def __exit__(self, *args):
        os.chdir(self.cwd)

-def sanitize_file_name(name):
-    '''
-    Remove characters that are illegal in filenames from name.
-    Also remove path separators. All illegal characters are replaced by
-    underscores.
-    '''
-    return re.sub(r'\s', ' ', re.sub(r'[\xae"\'\|\~\:\?\\\/]|^-', '_', name.strip()))

 def detect_ncpus():
    """Detects the number of effective CPUs in the system"""
--- a/src/calibre/constants.py
+++ b/src/calibre/constants.py
@ -29,6 +29,10 @@ winerror   = __import__('winerror') if iswindows else None
 win32api   = __import__('win32api') if iswindows else None
 fcntl      = None if iswindows else __import__('fcntl')

+filesystem_encoding = sys.getfilesystemencoding()
+if filesystem_encoding is None: filesystem_encoding = 'utf-8'
+
+
 ################################################################################
 plugins = None
 if plugins is None:
--- a/src/calibre/ebooks/mobi/reader.py
+++ b/src/calibre/ebooks/mobi/reader.py
@ -22,6 +22,7 @@ from calibre.ebooks.mobi.langcodes import main_language, sub_language
 from calibre.ebooks.metadata import MetaInformation
 from calibre.ebooks.metadata.opf import OPFCreator
 from calibre.ebooks.metadata.toc import TOC
+from calibre import sanitize_file_name

 class EXTHHeader(object):
    
@ -200,7 +201,8 @@ class MobiReader(object):
        guide = soup.find('guide')
        for elem in soup.findAll(['metadata', 'guide']):
            elem.extract()
-        htmlfile = os.path.join(output_dir, self.name+'.html')
+        htmlfile = os.path.join(output_dir, 
+                                sanitize_file_name(self.name)+'.html')
        try:
            for ref in guide.findAll('reference', href=True):
                ref['href'] = os.path.basename(htmlfile)+ref['href']
--- a/src/calibre/library/database2.py
+++ b/src/calibre/library/database2.py
@ -21,13 +21,12 @@ from calibre.library.sqlite import connect, IntegrityError
 from calibre.utils.search_query_parser import SearchQueryParser
 from calibre.ebooks.metadata import string_to_authors, authors_to_string
 from calibre.ebooks.metadata.meta import get_metadata
-from calibre.constants import preferred_encoding, iswindows, isosx
+from calibre.constants import preferred_encoding, iswindows, isosx, filesystem_encoding
 from calibre.ptempfile import PersistentTemporaryFile
 from calibre.customize.ui import run_plugins_on_import
+from calibre import sanitize_file_name

 copyfile = os.link if hasattr(os, 'link') else shutil.copyfile
-filesystem_encoding = sys.getfilesystemencoding()
-if filesystem_encoding is None: filesystem_encoding = 'utf-8'
 iscaseinsensitive = iswindows or isosx

 def normpath(x):
@ -37,23 +36,6 @@ def normpath(x):
        x = x.lower()
    return x

-_filename_sanitize = re.compile(r'[\xae\0\\|\?\*<":>\+\[\]/]')
-
-def sanitize_file_name(name, substitute='_'):
-    '''
-    Sanitize the filename `name`. All invalid characters are replaced by `substitute`.
-    The set of invalid characters is the union of the invalid characters in Windows,
-    OS X and Linux. Also removes leading an trailing whitespace.
-    **WARNING:** This function also replaces path separators, so only pass file names
-    and not full paths to it.
-    *NOTE:* This function always returns byte strings, not unicode objects. The byte strings
-    are encoded in the filesystem encoding of the platform, or UTF-8. 
-    '''
-    if isinstance(name, unicode):
-        name = name.encode(filesystem_encoding, 'ignore')
-    one = _filename_sanitize.sub(substitute, name)
-    one = re.sub(r'\s', ' ', one).strip()
-    return re.sub(r'^\.+$', '_', one)

 FIELD_MAP = {'id':0, 'title':1, 'authors':2, 'publisher':3, 'rating':4, 'timestamp':5, 
             'size':6, 'tags':7, 'comments':8, 'series':9, 'series_index':10,