Fix conversion of MOBI files on windows that contain a : in their titles

This commit is contained in:
Kovid Goyal 2008-12-29 12:51:09 -08:00
parent 53e2e7b314
commit 662a4641bf
4 changed files with 30 additions and 29 deletions

View File

@ -13,7 +13,8 @@ from calibre.startup import plugins, winutil, winutilerror
from calibre.constants import iswindows, isosx, islinux, isfrozen, \ from calibre.constants import iswindows, isosx, islinux, isfrozen, \
terminal_controller, preferred_encoding, \ terminal_controller, preferred_encoding, \
__appname__, __version__, __author__, \ __appname__, __version__, __author__, \
win32event, win32api, winerror, fcntl win32event, win32api, winerror, fcntl, \
filesystem_encoding
import mechanize import mechanize
mimetypes.add_type('application/epub+zip', '.epub') mimetypes.add_type('application/epub+zip', '.epub')
@ -41,6 +42,25 @@ def osx_version():
return int(m.group(1)), int(m.group(2)), int(m.group(3)) return int(m.group(1)), int(m.group(2)), int(m.group(3))
_filename_sanitize = re.compile(r'[\xae\0\\|\?\*<":>\+\[\]/]')
def sanitize_file_name(name, substitute='_'):
'''
Sanitize the filename `name`. All invalid characters are replaced by `substitute`.
The set of invalid characters is the union of the invalid characters in Windows,
OS X and Linux. Also removes leading an trailing whitespace.
**WARNING:** This function also replaces path separators, so only pass file names
and not full paths to it.
*NOTE:* This function always returns byte strings, not unicode objects. The byte strings
are encoded in the filesystem encoding of the platform, or UTF-8.
'''
if isinstance(name, unicode):
name = name.encode(filesystem_encoding, 'ignore')
one = _filename_sanitize.sub(substitute, name)
one = re.sub(r'\s', ' ', one).strip()
return re.sub(r'^\.+$', '_', one)
class CommandLineError(Exception): class CommandLineError(Exception):
pass pass
@ -201,13 +221,6 @@ class CurrentDir(object):
def __exit__(self, *args): def __exit__(self, *args):
os.chdir(self.cwd) os.chdir(self.cwd)
def sanitize_file_name(name):
'''
Remove characters that are illegal in filenames from name.
Also remove path separators. All illegal characters are replaced by
underscores.
'''
return re.sub(r'\s', ' ', re.sub(r'[\xae"\'\|\~\:\?\\\/]|^-', '_', name.strip()))
def detect_ncpus(): def detect_ncpus():
"""Detects the number of effective CPUs in the system""" """Detects the number of effective CPUs in the system"""

View File

@ -29,6 +29,10 @@ winerror = __import__('winerror') if iswindows else None
win32api = __import__('win32api') if iswindows else None win32api = __import__('win32api') if iswindows else None
fcntl = None if iswindows else __import__('fcntl') fcntl = None if iswindows else __import__('fcntl')
filesystem_encoding = sys.getfilesystemencoding()
if filesystem_encoding is None: filesystem_encoding = 'utf-8'
################################################################################ ################################################################################
plugins = None plugins = None
if plugins is None: if plugins is None:

View File

@ -22,6 +22,7 @@ from calibre.ebooks.mobi.langcodes import main_language, sub_language
from calibre.ebooks.metadata import MetaInformation from calibre.ebooks.metadata import MetaInformation
from calibre.ebooks.metadata.opf import OPFCreator from calibre.ebooks.metadata.opf import OPFCreator
from calibre.ebooks.metadata.toc import TOC from calibre.ebooks.metadata.toc import TOC
from calibre import sanitize_file_name
class EXTHHeader(object): class EXTHHeader(object):
@ -200,7 +201,8 @@ class MobiReader(object):
guide = soup.find('guide') guide = soup.find('guide')
for elem in soup.findAll(['metadata', 'guide']): for elem in soup.findAll(['metadata', 'guide']):
elem.extract() elem.extract()
htmlfile = os.path.join(output_dir, self.name+'.html') htmlfile = os.path.join(output_dir,
sanitize_file_name(self.name)+'.html')
try: try:
for ref in guide.findAll('reference', href=True): for ref in guide.findAll('reference', href=True):
ref['href'] = os.path.basename(htmlfile)+ref['href'] ref['href'] = os.path.basename(htmlfile)+ref['href']

View File

@ -21,13 +21,12 @@ from calibre.library.sqlite import connect, IntegrityError
from calibre.utils.search_query_parser import SearchQueryParser from calibre.utils.search_query_parser import SearchQueryParser
from calibre.ebooks.metadata import string_to_authors, authors_to_string from calibre.ebooks.metadata import string_to_authors, authors_to_string
from calibre.ebooks.metadata.meta import get_metadata from calibre.ebooks.metadata.meta import get_metadata
from calibre.constants import preferred_encoding, iswindows, isosx from calibre.constants import preferred_encoding, iswindows, isosx, filesystem_encoding
from calibre.ptempfile import PersistentTemporaryFile from calibre.ptempfile import PersistentTemporaryFile
from calibre.customize.ui import run_plugins_on_import from calibre.customize.ui import run_plugins_on_import
from calibre import sanitize_file_name
copyfile = os.link if hasattr(os, 'link') else shutil.copyfile copyfile = os.link if hasattr(os, 'link') else shutil.copyfile
filesystem_encoding = sys.getfilesystemencoding()
if filesystem_encoding is None: filesystem_encoding = 'utf-8'
iscaseinsensitive = iswindows or isosx iscaseinsensitive = iswindows or isosx
def normpath(x): def normpath(x):
@ -37,23 +36,6 @@ def normpath(x):
x = x.lower() x = x.lower()
return x return x
_filename_sanitize = re.compile(r'[\xae\0\\|\?\*<":>\+\[\]/]')
def sanitize_file_name(name, substitute='_'):
'''
Sanitize the filename `name`. All invalid characters are replaced by `substitute`.
The set of invalid characters is the union of the invalid characters in Windows,
OS X and Linux. Also removes leading an trailing whitespace.
**WARNING:** This function also replaces path separators, so only pass file names
and not full paths to it.
*NOTE:* This function always returns byte strings, not unicode objects. The byte strings
are encoded in the filesystem encoding of the platform, or UTF-8.
'''
if isinstance(name, unicode):
name = name.encode(filesystem_encoding, 'ignore')
one = _filename_sanitize.sub(substitute, name)
one = re.sub(r'\s', ' ', one).strip()
return re.sub(r'^\.+$', '_', one)
FIELD_MAP = {'id':0, 'title':1, 'authors':2, 'publisher':3, 'rating':4, 'timestamp':5, FIELD_MAP = {'id':0, 'title':1, 'authors':2, 'publisher':3, 'rating':4, 'timestamp':5,
'size':6, 'tags':7, 'comments':8, 'series':9, 'series_index':10, 'size':6, 'tags':7, 'comments':8, 'series':9, 'series_index':10,