diff --git a/src/calibre/__init__.py b/src/calibre/__init__.py index f483902126..7d44bef8f7 100644 --- a/src/calibre/__init__.py +++ b/src/calibre/__init__.py @@ -13,7 +13,8 @@ from calibre.startup import plugins, winutil, winutilerror from calibre.constants import iswindows, isosx, islinux, isfrozen, \ terminal_controller, preferred_encoding, \ __appname__, __version__, __author__, \ - win32event, win32api, winerror, fcntl + win32event, win32api, winerror, fcntl, \ + filesystem_encoding import mechanize mimetypes.add_type('application/epub+zip', '.epub') @@ -41,6 +42,25 @@ def osx_version(): return int(m.group(1)), int(m.group(2)), int(m.group(3)) +_filename_sanitize = re.compile(r'[\xae\0\\|\?\*<":>\+\[\]/]') + +def sanitize_file_name(name, substitute='_'): + ''' + Sanitize the filename `name`. All invalid characters are replaced by `substitute`. + The set of invalid characters is the union of the invalid characters in Windows, + OS X and Linux. Also removes leading an trailing whitespace. + **WARNING:** This function also replaces path separators, so only pass file names + and not full paths to it. + *NOTE:* This function always returns byte strings, not unicode objects. The byte strings + are encoded in the filesystem encoding of the platform, or UTF-8. + ''' + if isinstance(name, unicode): + name = name.encode(filesystem_encoding, 'ignore') + one = _filename_sanitize.sub(substitute, name) + one = re.sub(r'\s', ' ', one).strip() + return re.sub(r'^\.+$', '_', one) + + class CommandLineError(Exception): pass @@ -201,13 +221,6 @@ class CurrentDir(object): def __exit__(self, *args): os.chdir(self.cwd) -def sanitize_file_name(name): - ''' - Remove characters that are illegal in filenames from name. - Also remove path separators. All illegal characters are replaced by - underscores. - ''' - return re.sub(r'\s', ' ', re.sub(r'[\xae"\'\|\~\:\?\\\/]|^-', '_', name.strip())) def detect_ncpus(): """Detects the number of effective CPUs in the system""" diff --git a/src/calibre/constants.py b/src/calibre/constants.py index bfbebd5273..7454c84dd9 100644 --- a/src/calibre/constants.py +++ b/src/calibre/constants.py @@ -29,6 +29,10 @@ winerror = __import__('winerror') if iswindows else None win32api = __import__('win32api') if iswindows else None fcntl = None if iswindows else __import__('fcntl') +filesystem_encoding = sys.getfilesystemencoding() +if filesystem_encoding is None: filesystem_encoding = 'utf-8' + + ################################################################################ plugins = None if plugins is None: diff --git a/src/calibre/ebooks/mobi/reader.py b/src/calibre/ebooks/mobi/reader.py index 420159299e..5e553dcdd5 100644 --- a/src/calibre/ebooks/mobi/reader.py +++ b/src/calibre/ebooks/mobi/reader.py @@ -22,6 +22,7 @@ from calibre.ebooks.mobi.langcodes import main_language, sub_language from calibre.ebooks.metadata import MetaInformation from calibre.ebooks.metadata.opf import OPFCreator from calibre.ebooks.metadata.toc import TOC +from calibre import sanitize_file_name class EXTHHeader(object): @@ -200,7 +201,8 @@ class MobiReader(object): guide = soup.find('guide') for elem in soup.findAll(['metadata', 'guide']): elem.extract() - htmlfile = os.path.join(output_dir, self.name+'.html') + htmlfile = os.path.join(output_dir, + sanitize_file_name(self.name)+'.html') try: for ref in guide.findAll('reference', href=True): ref['href'] = os.path.basename(htmlfile)+ref['href'] diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py index 244ae72aeb..161442e840 100644 --- a/src/calibre/library/database2.py +++ b/src/calibre/library/database2.py @@ -21,13 +21,12 @@ from calibre.library.sqlite import connect, IntegrityError from calibre.utils.search_query_parser import SearchQueryParser from calibre.ebooks.metadata import string_to_authors, authors_to_string from calibre.ebooks.metadata.meta import get_metadata -from calibre.constants import preferred_encoding, iswindows, isosx +from calibre.constants import preferred_encoding, iswindows, isosx, filesystem_encoding from calibre.ptempfile import PersistentTemporaryFile from calibre.customize.ui import run_plugins_on_import +from calibre import sanitize_file_name copyfile = os.link if hasattr(os, 'link') else shutil.copyfile -filesystem_encoding = sys.getfilesystemencoding() -if filesystem_encoding is None: filesystem_encoding = 'utf-8' iscaseinsensitive = iswindows or isosx def normpath(x): @@ -37,23 +36,6 @@ def normpath(x): x = x.lower() return x -_filename_sanitize = re.compile(r'[\xae\0\\|\?\*<":>\+\[\]/]') - -def sanitize_file_name(name, substitute='_'): - ''' - Sanitize the filename `name`. All invalid characters are replaced by `substitute`. - The set of invalid characters is the union of the invalid characters in Windows, - OS X and Linux. Also removes leading an trailing whitespace. - **WARNING:** This function also replaces path separators, so only pass file names - and not full paths to it. - *NOTE:* This function always returns byte strings, not unicode objects. The byte strings - are encoded in the filesystem encoding of the platform, or UTF-8. - ''' - if isinstance(name, unicode): - name = name.encode(filesystem_encoding, 'ignore') - one = _filename_sanitize.sub(substitute, name) - one = re.sub(r'\s', ' ', one).strip() - return re.sub(r'^\.+$', '_', one) FIELD_MAP = {'id':0, 'title':1, 'authors':2, 'publisher':3, 'rating':4, 'timestamp':5, 'size':6, 'tags':7, 'comments':8, 'series':9, 'series_index':10,