Intelligently convert (almost) all filenames to ASCII. This should make for more readable file names as opposed to the previous practice of simply replacing unicode chracters with underscores.

This commit is contained in:
Kovid Goyal 2009-07-26 10:26:51 -06:00
parent 11068e0e09
commit 6cf006db05
8 changed files with 37 additions and 31 deletions

View File

@ -8,7 +8,7 @@ import os
import shutil
from itertools import cycle
from calibre import sanitize_file_name as sanitize
from calibre.utils.filenames import ascii_filename as sanitize
from calibre.devices.usbms.driver import USBMS
import calibre.devices.cybookg3.t2b as t2b

View File

@ -8,7 +8,7 @@ import os, re, sys, shutil
from itertools import cycle
from calibre.devices.usbms.driver import USBMS
from calibre import sanitize_file_name as sanitize
from calibre.utils.filenames import ascii_filename as sanitize
from calibre.ebooks.metadata import string_to_authors
class JETBOOK(USBMS):

View File

@ -21,7 +21,8 @@ except ImportError:
from lxml import html, etree
from calibre import entity_to_unicode, sanitize_file_name
from calibre import entity_to_unicode
from calibre.utils.filenames import ascii_filename
from calibre.ptempfile import TemporaryDirectory
from calibre.ebooks import DRMError
from calibre.ebooks.chardet import ENCODING_PATS
@ -374,7 +375,7 @@ class MobiReader(object):
fname = self.name.encode('ascii', 'replace')
fname = re.sub(r'[\x08\x15\0]+', '', fname)
htmlfile = os.path.join(output_dir,
sanitize_file_name(fname) + '.html')
ascii_filename(fname) + '.html')
try:
for ref in guide.xpath('descendant::reference'):
if ref.attrib.has_key('href'):

View File

@ -57,6 +57,7 @@ it under the same terms as Perl itself.
import re
from calibre.ebooks.unidecode.unicodepoints import CODEPOINTS
from calibre.constants import preferred_encoding
class Unidecoder(object):
@ -70,7 +71,10 @@ class Unidecoder(object):
try:
text = unicode(text)
except:
text = text.decode('utf-8', 'ignore')
try:
text = text.decode(preferred_encoding)
except:
text = text.decode('utf-8', 'replace')
# Replace characters larger than 127 with their ASCII equivelent.
return re.sub('[^\x00-\x7f]', lambda x: self.replace_point(x.group()),
text)
@ -80,7 +84,7 @@ class Unidecoder(object):
Returns the replacement character or ? if none can be found.
'''
try:
# Splite the unicode character xABCD into parts 0xAB and 0xCD.
# Split the unicode character xABCD into parts 0xAB and 0xCD.
# 0xAB represents the group within CODEPOINTS to query and 0xCD
# represents the position in the list of characters for the group.
return CODEPOINTS[self.code_group(codepoint)][self.grouped_point(

View File

@ -21,7 +21,7 @@ from calibre.gui2 import config, error_dialog, Dispatcher, dynamic, \
pixmap_to_data, warning_dialog, \
question_dialog
from calibre.ebooks.metadata import authors_to_string
from calibre import sanitize_file_name, preferred_encoding
from calibre import preferred_encoding
from calibre.utils.filenames import ascii_filename
from calibre.devices.errors import FreeSpaceError
from calibre.utils.smtp import compose_mail, sendmail, extract_email_address, \
@ -542,7 +542,7 @@ class DeviceGUI(object):
'\n\n' + t + '\n\t' + _('by') + ' ' + a + '\n\n' + \
_('in the %s format.') %
os.path.splitext(f)[1][1:].upper())
prefix = sanitize_file_name(t+' - '+a)
prefix = ascii_filename(t+' - '+a)
if not isinstance(prefix, unicode):
prefix = prefix.decode(preferred_encoding, 'replace')
attachment_names.append(prefix + os.path.splitext(f)[1])
@ -693,7 +693,7 @@ class DeviceGUI(object):
rows_are_ids=True)
names = []
for mi in metadata:
prefix = sanitize_file_name(mi['title'])
prefix = ascii_filename(mi['title'])
if not isinstance(prefix, unicode):
prefix = prefix.decode(preferred_encoding, 'replace')
prefix = ascii_filename(prefix)
@ -758,7 +758,7 @@ class DeviceGUI(object):
a = mi['authors']
if not a:
a = _('Unknown')
prefix = sanitize_file_name(t+' - '+a)
prefix = ascii_filename(t+' - '+a)
if not isinstance(prefix, unicode):
prefix = prefix.decode(preferred_encoding, 'replace')
prefix = ascii_filename(prefix)

View File

@ -14,8 +14,9 @@ from PyQt4.Qt import Qt, SIGNAL, QObject, QCoreApplication, QUrl, QTimer, \
QMessageBox, QStackedLayout
from PyQt4.QtSvg import QSvgRenderer
from calibre import __version__, __appname__, sanitize_file_name, \
from calibre import __version__, __appname__, \
iswindows, isosx, prints, patheq
from calibre.utils.filenames import ascii_filename
from calibre.ptempfile import PersistentTemporaryFile
from calibre.utils.config import prefs, dynamic
from calibre.utils.ipc.server import Server
@ -852,7 +853,7 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
def _files_added(self, paths=[], names=[], infos=[], on_card=None):
if paths:
self.upload_books(paths,
list(map(sanitize_file_name, names)),
list(map(ascii_filename, names)),
infos, on_card=on_card)
self.status_bar.showMessage(
_('Uploading books to device.'), 2000)

View File

@ -34,7 +34,7 @@ from calibre.constants import preferred_encoding, iswindows, isosx, filesystem_e
from calibre.ptempfile import PersistentTemporaryFile
from calibre.customize.ui import run_plugins_on_import
from calibre import sanitize_file_name
from calibre.utils.filenames import ascii_filename
from calibre.ebooks import BOOK_EXTENSIONS
if iswindows:
@ -652,8 +652,8 @@ class LibraryDatabase2(LibraryDatabase):
authors = self.authors(id, index_is_id=True)
if not authors:
authors = _('Unknown')
author = sanitize_file_name(authors.split(',')[0][:self.PATH_LIMIT]).decode(filesystem_encoding, 'ignore')
title = sanitize_file_name(self.title(id, index_is_id=True)[:self.PATH_LIMIT]).decode(filesystem_encoding, 'ignore')
author = ascii_filename(authors.split(',')[0][:self.PATH_LIMIT]).decode(filesystem_encoding, 'ignore')
title = ascii_filename(self.title(id, index_is_id=True)[:self.PATH_LIMIT]).decode(filesystem_encoding, 'ignore')
path = author + '/' + title + ' (%d)'%id
return path
@ -664,8 +664,8 @@ class LibraryDatabase2(LibraryDatabase):
authors = self.authors(id, index_is_id=True)
if not authors:
authors = _('Unknown')
author = sanitize_file_name(authors.split(',')[0][:self.PATH_LIMIT]).decode(filesystem_encoding, 'replace')
title = sanitize_file_name(self.title(id, index_is_id=True)[:self.PATH_LIMIT]).decode(filesystem_encoding, 'replace')
author = ascii_filename(authors.split(',')[0][:self.PATH_LIMIT]).decode(filesystem_encoding, 'replace')
title = ascii_filename(self.title(id, index_is_id=True)[:self.PATH_LIMIT]).decode(filesystem_encoding, 'replace')
name = title + ' - ' + author
while name.endswith('.'):
name = name[:-1]
@ -1520,12 +1520,12 @@ class LibraryDatabase2(LibraryDatabase):
x['cover'] = os.path.join(path, 'cover.jpg')
if not self.has_cover(x['id'], index_is_id=True):
x['cover'] = None
path += os.sep + self.construct_file_name(record[FIELD_MAP['id']]) + '.%s'
formats = self.formats(record[FIELD_MAP['id']], index_is_id=True)
if formats:
for fmt in formats.split(','):
x['formats'].append(path%fmt.lower())
x['fmt_'+fmt.lower()] = path%fmt.lower()
path = self.format_abspath(x['id'], fmt, index_is_id=True)
x['formats'].append(path)
x['fmt_'+fmt.lower()] = path
x['available_formats'] = [i.upper() for i in formats.split(',')]
return data
@ -1602,12 +1602,12 @@ books_series_link feeds
by_author[au] = []
by_author[au].append(index)
for au in by_author.keys():
apath = os.path.join(dir, sanitize_file_name(au))
apath = os.path.join(dir, ascii_filename(au))
if not single_dir and not os.path.exists(apath):
os.mkdir(apath)
for idx in by_author[au]:
title = re.sub(r'\s', ' ', self.title(idx, index_is_id=index_is_id))
tpath = os.path.join(apath, sanitize_file_name(title))
tpath = os.path.join(apath, ascii_filename(title))
id = idx if index_is_id else self.id(idx)
id = str(id)
if not single_dir and not os.path.exists(tpath):
@ -1621,10 +1621,10 @@ books_series_link feeds
mi.authors = [_('Unknown')]
cdata = self.cover(int(id), index_is_id=True)
if cdata is not None:
cname = sanitize_file_name(name)+'.jpg'
cname = ascii_filename(name)+'.jpg'
open(os.path.join(base, cname), 'wb').write(cdata)
mi.cover = cname
with open(os.path.join(base, sanitize_file_name(name)+'.opf'),
with open(os.path.join(base, ascii_filename(name)+'.opf'),
'wb') as f:
f.write(metadata_to_opf(mi))
@ -1636,7 +1636,7 @@ books_series_link feeds
if not data:
continue
fname = name +'.'+fmt.lower()
fname = sanitize_file_name(fname)
fname = ascii_filename(fname)
f = open(os.path.join(base, fname), 'w+b')
f.write(data)
f.flush()
@ -1671,7 +1671,7 @@ books_series_link feeds
if not au:
au = _('Unknown')
fname = '%s - %s.%s'%(title, au, format.lower())
fname = sanitize_file_name(fname)
fname = ascii_filename(fname)
if not os.path.exists(dir):
os.makedirs(dir)
f = open(os.path.join(dir, fname), 'w+b')

View File

@ -14,8 +14,8 @@ from httplib import responses
from PIL import Image
from cStringIO import StringIO
from calibre import browser, sanitize_file_name, \
relpath, unicode_path
from calibre import browser, relpath, unicode_path
from calibre.utils.filenames import ascii_filename
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
from calibre.ebooks.chardet import xml_to_unicode
from calibre.utils.config import OptionParser
@ -313,7 +313,7 @@ class RecursiveFetcher(object):
self.log.exception('Could not fetch image %s'% iurl)
continue
c += 1
fname = sanitize_file_name('img'+str(c)+ext)
fname = ascii_filename('img'+str(c)+ext)
if isinstance(fname, unicode):
fname = fname.encode('ascii', 'replace')
imgpath = os.path.join(diskpath, fname+'.jpg')
@ -416,7 +416,7 @@ class RecursiveFetcher(object):
if not isinstance(_fname, unicode):
_fname.decode('latin1', 'replace')
_fname = _fname.encode('ascii', 'replace').replace('%', '').replace(os.sep, '')
_fname = sanitize_file_name(_fname)
_fname = ascii_filename(_fname)
_fname = os.path.splitext(_fname)[0]+'.xhtml'
res = os.path.join(linkdiskpath, _fname)
self.downloaded_paths.append(res)