Intelligently convert (almost) all filenames to ASCII. This should make for more readable file names as opposed to the previous practice of simply replacing unicode chracters with underscores.

This commit is contained in:
Kovid Goyal 2009-07-26 10:26:51 -06:00
parent 11068e0e09
commit 6cf006db05
8 changed files with 37 additions and 31 deletions

View File

@ -8,7 +8,7 @@ import os
import shutil import shutil
from itertools import cycle from itertools import cycle
from calibre import sanitize_file_name as sanitize from calibre.utils.filenames import ascii_filename as sanitize
from calibre.devices.usbms.driver import USBMS from calibre.devices.usbms.driver import USBMS
import calibre.devices.cybookg3.t2b as t2b import calibre.devices.cybookg3.t2b as t2b

View File

@ -8,7 +8,7 @@ import os, re, sys, shutil
from itertools import cycle from itertools import cycle
from calibre.devices.usbms.driver import USBMS from calibre.devices.usbms.driver import USBMS
from calibre import sanitize_file_name as sanitize from calibre.utils.filenames import ascii_filename as sanitize
from calibre.ebooks.metadata import string_to_authors from calibre.ebooks.metadata import string_to_authors
class JETBOOK(USBMS): class JETBOOK(USBMS):

View File

@ -21,7 +21,8 @@ except ImportError:
from lxml import html, etree from lxml import html, etree
from calibre import entity_to_unicode, sanitize_file_name from calibre import entity_to_unicode
from calibre.utils.filenames import ascii_filename
from calibre.ptempfile import TemporaryDirectory from calibre.ptempfile import TemporaryDirectory
from calibre.ebooks import DRMError from calibre.ebooks import DRMError
from calibre.ebooks.chardet import ENCODING_PATS from calibre.ebooks.chardet import ENCODING_PATS
@ -374,7 +375,7 @@ class MobiReader(object):
fname = self.name.encode('ascii', 'replace') fname = self.name.encode('ascii', 'replace')
fname = re.sub(r'[\x08\x15\0]+', '', fname) fname = re.sub(r'[\x08\x15\0]+', '', fname)
htmlfile = os.path.join(output_dir, htmlfile = os.path.join(output_dir,
sanitize_file_name(fname) + '.html') ascii_filename(fname) + '.html')
try: try:
for ref in guide.xpath('descendant::reference'): for ref in guide.xpath('descendant::reference'):
if ref.attrib.has_key('href'): if ref.attrib.has_key('href'):

View File

@ -57,6 +57,7 @@ it under the same terms as Perl itself.
import re import re
from calibre.ebooks.unidecode.unicodepoints import CODEPOINTS from calibre.ebooks.unidecode.unicodepoints import CODEPOINTS
from calibre.constants import preferred_encoding
class Unidecoder(object): class Unidecoder(object):
@ -70,7 +71,10 @@ class Unidecoder(object):
try: try:
text = unicode(text) text = unicode(text)
except: except:
text = text.decode('utf-8', 'ignore') try:
text = text.decode(preferred_encoding)
except:
text = text.decode('utf-8', 'replace')
# Replace characters larger than 127 with their ASCII equivelent. # Replace characters larger than 127 with their ASCII equivelent.
return re.sub('[^\x00-\x7f]', lambda x: self.replace_point(x.group()), return re.sub('[^\x00-\x7f]', lambda x: self.replace_point(x.group()),
text) text)
@ -80,7 +84,7 @@ class Unidecoder(object):
Returns the replacement character or ? if none can be found. Returns the replacement character or ? if none can be found.
''' '''
try: try:
# Splite the unicode character xABCD into parts 0xAB and 0xCD. # Split the unicode character xABCD into parts 0xAB and 0xCD.
# 0xAB represents the group within CODEPOINTS to query and 0xCD # 0xAB represents the group within CODEPOINTS to query and 0xCD
# represents the position in the list of characters for the group. # represents the position in the list of characters for the group.
return CODEPOINTS[self.code_group(codepoint)][self.grouped_point( return CODEPOINTS[self.code_group(codepoint)][self.grouped_point(

View File

@ -21,7 +21,7 @@ from calibre.gui2 import config, error_dialog, Dispatcher, dynamic, \
pixmap_to_data, warning_dialog, \ pixmap_to_data, warning_dialog, \
question_dialog question_dialog
from calibre.ebooks.metadata import authors_to_string from calibre.ebooks.metadata import authors_to_string
from calibre import sanitize_file_name, preferred_encoding from calibre import preferred_encoding
from calibre.utils.filenames import ascii_filename from calibre.utils.filenames import ascii_filename
from calibre.devices.errors import FreeSpaceError from calibre.devices.errors import FreeSpaceError
from calibre.utils.smtp import compose_mail, sendmail, extract_email_address, \ from calibre.utils.smtp import compose_mail, sendmail, extract_email_address, \
@ -542,7 +542,7 @@ class DeviceGUI(object):
'\n\n' + t + '\n\t' + _('by') + ' ' + a + '\n\n' + \ '\n\n' + t + '\n\t' + _('by') + ' ' + a + '\n\n' + \
_('in the %s format.') % _('in the %s format.') %
os.path.splitext(f)[1][1:].upper()) os.path.splitext(f)[1][1:].upper())
prefix = sanitize_file_name(t+' - '+a) prefix = ascii_filename(t+' - '+a)
if not isinstance(prefix, unicode): if not isinstance(prefix, unicode):
prefix = prefix.decode(preferred_encoding, 'replace') prefix = prefix.decode(preferred_encoding, 'replace')
attachment_names.append(prefix + os.path.splitext(f)[1]) attachment_names.append(prefix + os.path.splitext(f)[1])
@ -693,7 +693,7 @@ class DeviceGUI(object):
rows_are_ids=True) rows_are_ids=True)
names = [] names = []
for mi in metadata: for mi in metadata:
prefix = sanitize_file_name(mi['title']) prefix = ascii_filename(mi['title'])
if not isinstance(prefix, unicode): if not isinstance(prefix, unicode):
prefix = prefix.decode(preferred_encoding, 'replace') prefix = prefix.decode(preferred_encoding, 'replace')
prefix = ascii_filename(prefix) prefix = ascii_filename(prefix)
@ -758,7 +758,7 @@ class DeviceGUI(object):
a = mi['authors'] a = mi['authors']
if not a: if not a:
a = _('Unknown') a = _('Unknown')
prefix = sanitize_file_name(t+' - '+a) prefix = ascii_filename(t+' - '+a)
if not isinstance(prefix, unicode): if not isinstance(prefix, unicode):
prefix = prefix.decode(preferred_encoding, 'replace') prefix = prefix.decode(preferred_encoding, 'replace')
prefix = ascii_filename(prefix) prefix = ascii_filename(prefix)

View File

@ -14,8 +14,9 @@ from PyQt4.Qt import Qt, SIGNAL, QObject, QCoreApplication, QUrl, QTimer, \
QMessageBox, QStackedLayout QMessageBox, QStackedLayout
from PyQt4.QtSvg import QSvgRenderer from PyQt4.QtSvg import QSvgRenderer
from calibre import __version__, __appname__, sanitize_file_name, \ from calibre import __version__, __appname__, \
iswindows, isosx, prints, patheq iswindows, isosx, prints, patheq
from calibre.utils.filenames import ascii_filename
from calibre.ptempfile import PersistentTemporaryFile from calibre.ptempfile import PersistentTemporaryFile
from calibre.utils.config import prefs, dynamic from calibre.utils.config import prefs, dynamic
from calibre.utils.ipc.server import Server from calibre.utils.ipc.server import Server
@ -852,7 +853,7 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
def _files_added(self, paths=[], names=[], infos=[], on_card=None): def _files_added(self, paths=[], names=[], infos=[], on_card=None):
if paths: if paths:
self.upload_books(paths, self.upload_books(paths,
list(map(sanitize_file_name, names)), list(map(ascii_filename, names)),
infos, on_card=on_card) infos, on_card=on_card)
self.status_bar.showMessage( self.status_bar.showMessage(
_('Uploading books to device.'), 2000) _('Uploading books to device.'), 2000)

View File

@ -34,7 +34,7 @@ from calibre.constants import preferred_encoding, iswindows, isosx, filesystem_e
from calibre.ptempfile import PersistentTemporaryFile from calibre.ptempfile import PersistentTemporaryFile
from calibre.customize.ui import run_plugins_on_import from calibre.customize.ui import run_plugins_on_import
from calibre import sanitize_file_name from calibre.utils.filenames import ascii_filename
from calibre.ebooks import BOOK_EXTENSIONS from calibre.ebooks import BOOK_EXTENSIONS
if iswindows: if iswindows:
@ -652,8 +652,8 @@ class LibraryDatabase2(LibraryDatabase):
authors = self.authors(id, index_is_id=True) authors = self.authors(id, index_is_id=True)
if not authors: if not authors:
authors = _('Unknown') authors = _('Unknown')
author = sanitize_file_name(authors.split(',')[0][:self.PATH_LIMIT]).decode(filesystem_encoding, 'ignore') author = ascii_filename(authors.split(',')[0][:self.PATH_LIMIT]).decode(filesystem_encoding, 'ignore')
title = sanitize_file_name(self.title(id, index_is_id=True)[:self.PATH_LIMIT]).decode(filesystem_encoding, 'ignore') title = ascii_filename(self.title(id, index_is_id=True)[:self.PATH_LIMIT]).decode(filesystem_encoding, 'ignore')
path = author + '/' + title + ' (%d)'%id path = author + '/' + title + ' (%d)'%id
return path return path
@ -664,8 +664,8 @@ class LibraryDatabase2(LibraryDatabase):
authors = self.authors(id, index_is_id=True) authors = self.authors(id, index_is_id=True)
if not authors: if not authors:
authors = _('Unknown') authors = _('Unknown')
author = sanitize_file_name(authors.split(',')[0][:self.PATH_LIMIT]).decode(filesystem_encoding, 'replace') author = ascii_filename(authors.split(',')[0][:self.PATH_LIMIT]).decode(filesystem_encoding, 'replace')
title = sanitize_file_name(self.title(id, index_is_id=True)[:self.PATH_LIMIT]).decode(filesystem_encoding, 'replace') title = ascii_filename(self.title(id, index_is_id=True)[:self.PATH_LIMIT]).decode(filesystem_encoding, 'replace')
name = title + ' - ' + author name = title + ' - ' + author
while name.endswith('.'): while name.endswith('.'):
name = name[:-1] name = name[:-1]
@ -1520,12 +1520,12 @@ class LibraryDatabase2(LibraryDatabase):
x['cover'] = os.path.join(path, 'cover.jpg') x['cover'] = os.path.join(path, 'cover.jpg')
if not self.has_cover(x['id'], index_is_id=True): if not self.has_cover(x['id'], index_is_id=True):
x['cover'] = None x['cover'] = None
path += os.sep + self.construct_file_name(record[FIELD_MAP['id']]) + '.%s'
formats = self.formats(record[FIELD_MAP['id']], index_is_id=True) formats = self.formats(record[FIELD_MAP['id']], index_is_id=True)
if formats: if formats:
for fmt in formats.split(','): for fmt in formats.split(','):
x['formats'].append(path%fmt.lower()) path = self.format_abspath(x['id'], fmt, index_is_id=True)
x['fmt_'+fmt.lower()] = path%fmt.lower() x['formats'].append(path)
x['fmt_'+fmt.lower()] = path
x['available_formats'] = [i.upper() for i in formats.split(',')] x['available_formats'] = [i.upper() for i in formats.split(',')]
return data return data
@ -1602,12 +1602,12 @@ books_series_link feeds
by_author[au] = [] by_author[au] = []
by_author[au].append(index) by_author[au].append(index)
for au in by_author.keys(): for au in by_author.keys():
apath = os.path.join(dir, sanitize_file_name(au)) apath = os.path.join(dir, ascii_filename(au))
if not single_dir and not os.path.exists(apath): if not single_dir and not os.path.exists(apath):
os.mkdir(apath) os.mkdir(apath)
for idx in by_author[au]: for idx in by_author[au]:
title = re.sub(r'\s', ' ', self.title(idx, index_is_id=index_is_id)) title = re.sub(r'\s', ' ', self.title(idx, index_is_id=index_is_id))
tpath = os.path.join(apath, sanitize_file_name(title)) tpath = os.path.join(apath, ascii_filename(title))
id = idx if index_is_id else self.id(idx) id = idx if index_is_id else self.id(idx)
id = str(id) id = str(id)
if not single_dir and not os.path.exists(tpath): if not single_dir and not os.path.exists(tpath):
@ -1621,10 +1621,10 @@ books_series_link feeds
mi.authors = [_('Unknown')] mi.authors = [_('Unknown')]
cdata = self.cover(int(id), index_is_id=True) cdata = self.cover(int(id), index_is_id=True)
if cdata is not None: if cdata is not None:
cname = sanitize_file_name(name)+'.jpg' cname = ascii_filename(name)+'.jpg'
open(os.path.join(base, cname), 'wb').write(cdata) open(os.path.join(base, cname), 'wb').write(cdata)
mi.cover = cname mi.cover = cname
with open(os.path.join(base, sanitize_file_name(name)+'.opf'), with open(os.path.join(base, ascii_filename(name)+'.opf'),
'wb') as f: 'wb') as f:
f.write(metadata_to_opf(mi)) f.write(metadata_to_opf(mi))
@ -1636,7 +1636,7 @@ books_series_link feeds
if not data: if not data:
continue continue
fname = name +'.'+fmt.lower() fname = name +'.'+fmt.lower()
fname = sanitize_file_name(fname) fname = ascii_filename(fname)
f = open(os.path.join(base, fname), 'w+b') f = open(os.path.join(base, fname), 'w+b')
f.write(data) f.write(data)
f.flush() f.flush()
@ -1671,7 +1671,7 @@ books_series_link feeds
if not au: if not au:
au = _('Unknown') au = _('Unknown')
fname = '%s - %s.%s'%(title, au, format.lower()) fname = '%s - %s.%s'%(title, au, format.lower())
fname = sanitize_file_name(fname) fname = ascii_filename(fname)
if not os.path.exists(dir): if not os.path.exists(dir):
os.makedirs(dir) os.makedirs(dir)
f = open(os.path.join(dir, fname), 'w+b') f = open(os.path.join(dir, fname), 'w+b')

View File

@ -14,8 +14,8 @@ from httplib import responses
from PIL import Image from PIL import Image
from cStringIO import StringIO from cStringIO import StringIO
from calibre import browser, sanitize_file_name, \ from calibre import browser, relpath, unicode_path
relpath, unicode_path from calibre.utils.filenames import ascii_filename
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
from calibre.ebooks.chardet import xml_to_unicode from calibre.ebooks.chardet import xml_to_unicode
from calibre.utils.config import OptionParser from calibre.utils.config import OptionParser
@ -313,7 +313,7 @@ class RecursiveFetcher(object):
self.log.exception('Could not fetch image %s'% iurl) self.log.exception('Could not fetch image %s'% iurl)
continue continue
c += 1 c += 1
fname = sanitize_file_name('img'+str(c)+ext) fname = ascii_filename('img'+str(c)+ext)
if isinstance(fname, unicode): if isinstance(fname, unicode):
fname = fname.encode('ascii', 'replace') fname = fname.encode('ascii', 'replace')
imgpath = os.path.join(diskpath, fname+'.jpg') imgpath = os.path.join(diskpath, fname+'.jpg')
@ -416,7 +416,7 @@ class RecursiveFetcher(object):
if not isinstance(_fname, unicode): if not isinstance(_fname, unicode):
_fname.decode('latin1', 'replace') _fname.decode('latin1', 'replace')
_fname = _fname.encode('ascii', 'replace').replace('%', '').replace(os.sep, '') _fname = _fname.encode('ascii', 'replace').replace('%', '').replace(os.sep, '')
_fname = sanitize_file_name(_fname) _fname = ascii_filename(_fname)
_fname = os.path.splitext(_fname)[0]+'.xhtml' _fname = os.path.splitext(_fname)[0]+'.xhtml'
res = os.path.join(linkdiskpath, _fname) res = os.path.join(linkdiskpath, _fname)
self.downloaded_paths.append(res) self.downloaded_paths.append(res)