From 6cf006db0592f08e9967ad4fa42a9f467ed70947 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 26 Jul 2009 10:26:51 -0600 Subject: [PATCH] Intelligently convert (almost) all filenames to ASCII. This should make for more readable file names as opposed to the previous practice of simply replacing unicode chracters with underscores. --- src/calibre/devices/cybookg3/driver.py | 4 ++-- src/calibre/devices/jetbook/driver.py | 2 +- src/calibre/ebooks/mobi/reader.py | 5 ++-- src/calibre/ebooks/unidecode/unidecoder.py | 8 +++++-- src/calibre/gui2/device.py | 8 +++---- src/calibre/gui2/main.py | 5 ++-- src/calibre/library/database2.py | 28 +++++++++++----------- src/calibre/web/fetch/simple.py | 8 +++---- 8 files changed, 37 insertions(+), 31 deletions(-) diff --git a/src/calibre/devices/cybookg3/driver.py b/src/calibre/devices/cybookg3/driver.py index 9976a0f02e..a8ce905b00 100644 --- a/src/calibre/devices/cybookg3/driver.py +++ b/src/calibre/devices/cybookg3/driver.py @@ -8,7 +8,7 @@ import os import shutil from itertools import cycle -from calibre import sanitize_file_name as sanitize +from calibre.utils.filenames import ascii_filename as sanitize from calibre.devices.usbms.driver import USBMS import calibre.devices.cybookg3.t2b as t2b @@ -98,7 +98,7 @@ class CYBOOKG3(USBMS): self.report_progress(i / float(len(files)), _('Transferring books to device...')) self.report_progress(1.0, _('Transferring books to device...')) - + return zip(paths, cycle([on_card])) def delete_books(self, paths, end_session=True): diff --git a/src/calibre/devices/jetbook/driver.py b/src/calibre/devices/jetbook/driver.py index 9e91f4cfa9..c6668364a7 100644 --- a/src/calibre/devices/jetbook/driver.py +++ b/src/calibre/devices/jetbook/driver.py @@ -8,7 +8,7 @@ import os, re, sys, shutil from itertools import cycle from calibre.devices.usbms.driver import USBMS -from calibre import sanitize_file_name as sanitize +from calibre.utils.filenames import ascii_filename as sanitize from calibre.ebooks.metadata import string_to_authors class JETBOOK(USBMS): diff --git a/src/calibre/ebooks/mobi/reader.py b/src/calibre/ebooks/mobi/reader.py index 3d46668ee9..eed42bce46 100644 --- a/src/calibre/ebooks/mobi/reader.py +++ b/src/calibre/ebooks/mobi/reader.py @@ -21,7 +21,8 @@ except ImportError: from lxml import html, etree -from calibre import entity_to_unicode, sanitize_file_name +from calibre import entity_to_unicode +from calibre.utils.filenames import ascii_filename from calibre.ptempfile import TemporaryDirectory from calibre.ebooks import DRMError from calibre.ebooks.chardet import ENCODING_PATS @@ -374,7 +375,7 @@ class MobiReader(object): fname = self.name.encode('ascii', 'replace') fname = re.sub(r'[\x08\x15\0]+', '', fname) htmlfile = os.path.join(output_dir, - sanitize_file_name(fname) + '.html') + ascii_filename(fname) + '.html') try: for ref in guide.xpath('descendant::reference'): if ref.attrib.has_key('href'): diff --git a/src/calibre/ebooks/unidecode/unidecoder.py b/src/calibre/ebooks/unidecode/unidecoder.py index 8da60d29e9..d31239a1dc 100644 --- a/src/calibre/ebooks/unidecode/unidecoder.py +++ b/src/calibre/ebooks/unidecode/unidecoder.py @@ -57,6 +57,7 @@ it under the same terms as Perl itself. import re from calibre.ebooks.unidecode.unicodepoints import CODEPOINTS +from calibre.constants import preferred_encoding class Unidecoder(object): @@ -70,7 +71,10 @@ class Unidecoder(object): try: text = unicode(text) except: - text = text.decode('utf-8', 'ignore') + try: + text = text.decode(preferred_encoding) + except: + text = text.decode('utf-8', 'replace') # Replace characters larger than 127 with their ASCII equivelent. return re.sub('[^\x00-\x7f]', lambda x: self.replace_point(x.group()), text) @@ -80,7 +84,7 @@ class Unidecoder(object): Returns the replacement character or ? if none can be found. ''' try: - # Splite the unicode character xABCD into parts 0xAB and 0xCD. + # Split the unicode character xABCD into parts 0xAB and 0xCD. # 0xAB represents the group within CODEPOINTS to query and 0xCD # represents the position in the list of characters for the group. return CODEPOINTS[self.code_group(codepoint)][self.grouped_point( diff --git a/src/calibre/gui2/device.py b/src/calibre/gui2/device.py index 19228afa92..ffbcb2e9e2 100644 --- a/src/calibre/gui2/device.py +++ b/src/calibre/gui2/device.py @@ -21,7 +21,7 @@ from calibre.gui2 import config, error_dialog, Dispatcher, dynamic, \ pixmap_to_data, warning_dialog, \ question_dialog from calibre.ebooks.metadata import authors_to_string -from calibre import sanitize_file_name, preferred_encoding +from calibre import preferred_encoding from calibre.utils.filenames import ascii_filename from calibre.devices.errors import FreeSpaceError from calibre.utils.smtp import compose_mail, sendmail, extract_email_address, \ @@ -542,7 +542,7 @@ class DeviceGUI(object): '\n\n' + t + '\n\t' + _('by') + ' ' + a + '\n\n' + \ _('in the %s format.') % os.path.splitext(f)[1][1:].upper()) - prefix = sanitize_file_name(t+' - '+a) + prefix = ascii_filename(t+' - '+a) if not isinstance(prefix, unicode): prefix = prefix.decode(preferred_encoding, 'replace') attachment_names.append(prefix + os.path.splitext(f)[1]) @@ -693,7 +693,7 @@ class DeviceGUI(object): rows_are_ids=True) names = [] for mi in metadata: - prefix = sanitize_file_name(mi['title']) + prefix = ascii_filename(mi['title']) if not isinstance(prefix, unicode): prefix = prefix.decode(preferred_encoding, 'replace') prefix = ascii_filename(prefix) @@ -758,7 +758,7 @@ class DeviceGUI(object): a = mi['authors'] if not a: a = _('Unknown') - prefix = sanitize_file_name(t+' - '+a) + prefix = ascii_filename(t+' - '+a) if not isinstance(prefix, unicode): prefix = prefix.decode(preferred_encoding, 'replace') prefix = ascii_filename(prefix) diff --git a/src/calibre/gui2/main.py b/src/calibre/gui2/main.py index b4b8494c64..5874e57599 100644 --- a/src/calibre/gui2/main.py +++ b/src/calibre/gui2/main.py @@ -14,8 +14,9 @@ from PyQt4.Qt import Qt, SIGNAL, QObject, QCoreApplication, QUrl, QTimer, \ QMessageBox, QStackedLayout from PyQt4.QtSvg import QSvgRenderer -from calibre import __version__, __appname__, sanitize_file_name, \ +from calibre import __version__, __appname__, \ iswindows, isosx, prints, patheq +from calibre.utils.filenames import ascii_filename from calibre.ptempfile import PersistentTemporaryFile from calibre.utils.config import prefs, dynamic from calibre.utils.ipc.server import Server @@ -852,7 +853,7 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI): def _files_added(self, paths=[], names=[], infos=[], on_card=None): if paths: self.upload_books(paths, - list(map(sanitize_file_name, names)), + list(map(ascii_filename, names)), infos, on_card=on_card) self.status_bar.showMessage( _('Uploading books to device.'), 2000) diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py index d7344b5681..c04e27aa75 100644 --- a/src/calibre/library/database2.py +++ b/src/calibre/library/database2.py @@ -34,7 +34,7 @@ from calibre.constants import preferred_encoding, iswindows, isosx, filesystem_e from calibre.ptempfile import PersistentTemporaryFile from calibre.customize.ui import run_plugins_on_import -from calibre import sanitize_file_name +from calibre.utils.filenames import ascii_filename from calibre.ebooks import BOOK_EXTENSIONS if iswindows: @@ -652,8 +652,8 @@ class LibraryDatabase2(LibraryDatabase): authors = self.authors(id, index_is_id=True) if not authors: authors = _('Unknown') - author = sanitize_file_name(authors.split(',')[0][:self.PATH_LIMIT]).decode(filesystem_encoding, 'ignore') - title = sanitize_file_name(self.title(id, index_is_id=True)[:self.PATH_LIMIT]).decode(filesystem_encoding, 'ignore') + author = ascii_filename(authors.split(',')[0][:self.PATH_LIMIT]).decode(filesystem_encoding, 'ignore') + title = ascii_filename(self.title(id, index_is_id=True)[:self.PATH_LIMIT]).decode(filesystem_encoding, 'ignore') path = author + '/' + title + ' (%d)'%id return path @@ -664,8 +664,8 @@ class LibraryDatabase2(LibraryDatabase): authors = self.authors(id, index_is_id=True) if not authors: authors = _('Unknown') - author = sanitize_file_name(authors.split(',')[0][:self.PATH_LIMIT]).decode(filesystem_encoding, 'replace') - title = sanitize_file_name(self.title(id, index_is_id=True)[:self.PATH_LIMIT]).decode(filesystem_encoding, 'replace') + author = ascii_filename(authors.split(',')[0][:self.PATH_LIMIT]).decode(filesystem_encoding, 'replace') + title = ascii_filename(self.title(id, index_is_id=True)[:self.PATH_LIMIT]).decode(filesystem_encoding, 'replace') name = title + ' - ' + author while name.endswith('.'): name = name[:-1] @@ -1520,12 +1520,12 @@ class LibraryDatabase2(LibraryDatabase): x['cover'] = os.path.join(path, 'cover.jpg') if not self.has_cover(x['id'], index_is_id=True): x['cover'] = None - path += os.sep + self.construct_file_name(record[FIELD_MAP['id']]) + '.%s' formats = self.formats(record[FIELD_MAP['id']], index_is_id=True) if formats: for fmt in formats.split(','): - x['formats'].append(path%fmt.lower()) - x['fmt_'+fmt.lower()] = path%fmt.lower() + path = self.format_abspath(x['id'], fmt, index_is_id=True) + x['formats'].append(path) + x['fmt_'+fmt.lower()] = path x['available_formats'] = [i.upper() for i in formats.split(',')] return data @@ -1602,12 +1602,12 @@ books_series_link feeds by_author[au] = [] by_author[au].append(index) for au in by_author.keys(): - apath = os.path.join(dir, sanitize_file_name(au)) + apath = os.path.join(dir, ascii_filename(au)) if not single_dir and not os.path.exists(apath): os.mkdir(apath) for idx in by_author[au]: title = re.sub(r'\s', ' ', self.title(idx, index_is_id=index_is_id)) - tpath = os.path.join(apath, sanitize_file_name(title)) + tpath = os.path.join(apath, ascii_filename(title)) id = idx if index_is_id else self.id(idx) id = str(id) if not single_dir and not os.path.exists(tpath): @@ -1621,10 +1621,10 @@ books_series_link feeds mi.authors = [_('Unknown')] cdata = self.cover(int(id), index_is_id=True) if cdata is not None: - cname = sanitize_file_name(name)+'.jpg' + cname = ascii_filename(name)+'.jpg' open(os.path.join(base, cname), 'wb').write(cdata) mi.cover = cname - with open(os.path.join(base, sanitize_file_name(name)+'.opf'), + with open(os.path.join(base, ascii_filename(name)+'.opf'), 'wb') as f: f.write(metadata_to_opf(mi)) @@ -1636,7 +1636,7 @@ books_series_link feeds if not data: continue fname = name +'.'+fmt.lower() - fname = sanitize_file_name(fname) + fname = ascii_filename(fname) f = open(os.path.join(base, fname), 'w+b') f.write(data) f.flush() @@ -1671,7 +1671,7 @@ books_series_link feeds if not au: au = _('Unknown') fname = '%s - %s.%s'%(title, au, format.lower()) - fname = sanitize_file_name(fname) + fname = ascii_filename(fname) if not os.path.exists(dir): os.makedirs(dir) f = open(os.path.join(dir, fname), 'w+b') diff --git a/src/calibre/web/fetch/simple.py b/src/calibre/web/fetch/simple.py index f956c4ee10..238ab343a9 100644 --- a/src/calibre/web/fetch/simple.py +++ b/src/calibre/web/fetch/simple.py @@ -14,8 +14,8 @@ from httplib import responses from PIL import Image from cStringIO import StringIO -from calibre import browser, sanitize_file_name, \ - relpath, unicode_path +from calibre import browser, relpath, unicode_path +from calibre.utils.filenames import ascii_filename from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag from calibre.ebooks.chardet import xml_to_unicode from calibre.utils.config import OptionParser @@ -313,7 +313,7 @@ class RecursiveFetcher(object): self.log.exception('Could not fetch image %s'% iurl) continue c += 1 - fname = sanitize_file_name('img'+str(c)+ext) + fname = ascii_filename('img'+str(c)+ext) if isinstance(fname, unicode): fname = fname.encode('ascii', 'replace') imgpath = os.path.join(diskpath, fname+'.jpg') @@ -416,7 +416,7 @@ class RecursiveFetcher(object): if not isinstance(_fname, unicode): _fname.decode('latin1', 'replace') _fname = _fname.encode('ascii', 'replace').replace('%', '').replace(os.sep, '') - _fname = sanitize_file_name(_fname) + _fname = ascii_filename(_fname) _fname = os.path.splitext(_fname)[0]+'.xhtml' res = os.path.join(linkdiskpath, _fname) self.downloaded_paths.append(res)