mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Intelligently convert (almost) all filenames to ASCII. This should make for more readable file names as opposed to the previous practice of simply replacing unicode chracters with underscores.
This commit is contained in:
parent
11068e0e09
commit
6cf006db05
@ -8,7 +8,7 @@ import os
|
|||||||
import shutil
|
import shutil
|
||||||
from itertools import cycle
|
from itertools import cycle
|
||||||
|
|
||||||
from calibre import sanitize_file_name as sanitize
|
from calibre.utils.filenames import ascii_filename as sanitize
|
||||||
from calibre.devices.usbms.driver import USBMS
|
from calibre.devices.usbms.driver import USBMS
|
||||||
import calibre.devices.cybookg3.t2b as t2b
|
import calibre.devices.cybookg3.t2b as t2b
|
||||||
|
|
||||||
@ -98,7 +98,7 @@ class CYBOOKG3(USBMS):
|
|||||||
self.report_progress(i / float(len(files)), _('Transferring books to device...'))
|
self.report_progress(i / float(len(files)), _('Transferring books to device...'))
|
||||||
|
|
||||||
self.report_progress(1.0, _('Transferring books to device...'))
|
self.report_progress(1.0, _('Transferring books to device...'))
|
||||||
|
|
||||||
return zip(paths, cycle([on_card]))
|
return zip(paths, cycle([on_card]))
|
||||||
|
|
||||||
def delete_books(self, paths, end_session=True):
|
def delete_books(self, paths, end_session=True):
|
||||||
|
@ -8,7 +8,7 @@ import os, re, sys, shutil
|
|||||||
from itertools import cycle
|
from itertools import cycle
|
||||||
|
|
||||||
from calibre.devices.usbms.driver import USBMS
|
from calibre.devices.usbms.driver import USBMS
|
||||||
from calibre import sanitize_file_name as sanitize
|
from calibre.utils.filenames import ascii_filename as sanitize
|
||||||
from calibre.ebooks.metadata import string_to_authors
|
from calibre.ebooks.metadata import string_to_authors
|
||||||
|
|
||||||
class JETBOOK(USBMS):
|
class JETBOOK(USBMS):
|
||||||
|
@ -21,7 +21,8 @@ except ImportError:
|
|||||||
|
|
||||||
from lxml import html, etree
|
from lxml import html, etree
|
||||||
|
|
||||||
from calibre import entity_to_unicode, sanitize_file_name
|
from calibre import entity_to_unicode
|
||||||
|
from calibre.utils.filenames import ascii_filename
|
||||||
from calibre.ptempfile import TemporaryDirectory
|
from calibre.ptempfile import TemporaryDirectory
|
||||||
from calibre.ebooks import DRMError
|
from calibre.ebooks import DRMError
|
||||||
from calibre.ebooks.chardet import ENCODING_PATS
|
from calibre.ebooks.chardet import ENCODING_PATS
|
||||||
@ -374,7 +375,7 @@ class MobiReader(object):
|
|||||||
fname = self.name.encode('ascii', 'replace')
|
fname = self.name.encode('ascii', 'replace')
|
||||||
fname = re.sub(r'[\x08\x15\0]+', '', fname)
|
fname = re.sub(r'[\x08\x15\0]+', '', fname)
|
||||||
htmlfile = os.path.join(output_dir,
|
htmlfile = os.path.join(output_dir,
|
||||||
sanitize_file_name(fname) + '.html')
|
ascii_filename(fname) + '.html')
|
||||||
try:
|
try:
|
||||||
for ref in guide.xpath('descendant::reference'):
|
for ref in guide.xpath('descendant::reference'):
|
||||||
if ref.attrib.has_key('href'):
|
if ref.attrib.has_key('href'):
|
||||||
|
@ -57,6 +57,7 @@ it under the same terms as Perl itself.
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from calibre.ebooks.unidecode.unicodepoints import CODEPOINTS
|
from calibre.ebooks.unidecode.unicodepoints import CODEPOINTS
|
||||||
|
from calibre.constants import preferred_encoding
|
||||||
|
|
||||||
class Unidecoder(object):
|
class Unidecoder(object):
|
||||||
|
|
||||||
@ -70,7 +71,10 @@ class Unidecoder(object):
|
|||||||
try:
|
try:
|
||||||
text = unicode(text)
|
text = unicode(text)
|
||||||
except:
|
except:
|
||||||
text = text.decode('utf-8', 'ignore')
|
try:
|
||||||
|
text = text.decode(preferred_encoding)
|
||||||
|
except:
|
||||||
|
text = text.decode('utf-8', 'replace')
|
||||||
# Replace characters larger than 127 with their ASCII equivelent.
|
# Replace characters larger than 127 with their ASCII equivelent.
|
||||||
return re.sub('[^\x00-\x7f]', lambda x: self.replace_point(x.group()),
|
return re.sub('[^\x00-\x7f]', lambda x: self.replace_point(x.group()),
|
||||||
text)
|
text)
|
||||||
@ -80,7 +84,7 @@ class Unidecoder(object):
|
|||||||
Returns the replacement character or ? if none can be found.
|
Returns the replacement character or ? if none can be found.
|
||||||
'''
|
'''
|
||||||
try:
|
try:
|
||||||
# Splite the unicode character xABCD into parts 0xAB and 0xCD.
|
# Split the unicode character xABCD into parts 0xAB and 0xCD.
|
||||||
# 0xAB represents the group within CODEPOINTS to query and 0xCD
|
# 0xAB represents the group within CODEPOINTS to query and 0xCD
|
||||||
# represents the position in the list of characters for the group.
|
# represents the position in the list of characters for the group.
|
||||||
return CODEPOINTS[self.code_group(codepoint)][self.grouped_point(
|
return CODEPOINTS[self.code_group(codepoint)][self.grouped_point(
|
||||||
|
@ -21,7 +21,7 @@ from calibre.gui2 import config, error_dialog, Dispatcher, dynamic, \
|
|||||||
pixmap_to_data, warning_dialog, \
|
pixmap_to_data, warning_dialog, \
|
||||||
question_dialog
|
question_dialog
|
||||||
from calibre.ebooks.metadata import authors_to_string
|
from calibre.ebooks.metadata import authors_to_string
|
||||||
from calibre import sanitize_file_name, preferred_encoding
|
from calibre import preferred_encoding
|
||||||
from calibre.utils.filenames import ascii_filename
|
from calibre.utils.filenames import ascii_filename
|
||||||
from calibre.devices.errors import FreeSpaceError
|
from calibre.devices.errors import FreeSpaceError
|
||||||
from calibre.utils.smtp import compose_mail, sendmail, extract_email_address, \
|
from calibre.utils.smtp import compose_mail, sendmail, extract_email_address, \
|
||||||
@ -542,7 +542,7 @@ class DeviceGUI(object):
|
|||||||
'\n\n' + t + '\n\t' + _('by') + ' ' + a + '\n\n' + \
|
'\n\n' + t + '\n\t' + _('by') + ' ' + a + '\n\n' + \
|
||||||
_('in the %s format.') %
|
_('in the %s format.') %
|
||||||
os.path.splitext(f)[1][1:].upper())
|
os.path.splitext(f)[1][1:].upper())
|
||||||
prefix = sanitize_file_name(t+' - '+a)
|
prefix = ascii_filename(t+' - '+a)
|
||||||
if not isinstance(prefix, unicode):
|
if not isinstance(prefix, unicode):
|
||||||
prefix = prefix.decode(preferred_encoding, 'replace')
|
prefix = prefix.decode(preferred_encoding, 'replace')
|
||||||
attachment_names.append(prefix + os.path.splitext(f)[1])
|
attachment_names.append(prefix + os.path.splitext(f)[1])
|
||||||
@ -693,7 +693,7 @@ class DeviceGUI(object):
|
|||||||
rows_are_ids=True)
|
rows_are_ids=True)
|
||||||
names = []
|
names = []
|
||||||
for mi in metadata:
|
for mi in metadata:
|
||||||
prefix = sanitize_file_name(mi['title'])
|
prefix = ascii_filename(mi['title'])
|
||||||
if not isinstance(prefix, unicode):
|
if not isinstance(prefix, unicode):
|
||||||
prefix = prefix.decode(preferred_encoding, 'replace')
|
prefix = prefix.decode(preferred_encoding, 'replace')
|
||||||
prefix = ascii_filename(prefix)
|
prefix = ascii_filename(prefix)
|
||||||
@ -758,7 +758,7 @@ class DeviceGUI(object):
|
|||||||
a = mi['authors']
|
a = mi['authors']
|
||||||
if not a:
|
if not a:
|
||||||
a = _('Unknown')
|
a = _('Unknown')
|
||||||
prefix = sanitize_file_name(t+' - '+a)
|
prefix = ascii_filename(t+' - '+a)
|
||||||
if not isinstance(prefix, unicode):
|
if not isinstance(prefix, unicode):
|
||||||
prefix = prefix.decode(preferred_encoding, 'replace')
|
prefix = prefix.decode(preferred_encoding, 'replace')
|
||||||
prefix = ascii_filename(prefix)
|
prefix = ascii_filename(prefix)
|
||||||
|
@ -14,8 +14,9 @@ from PyQt4.Qt import Qt, SIGNAL, QObject, QCoreApplication, QUrl, QTimer, \
|
|||||||
QMessageBox, QStackedLayout
|
QMessageBox, QStackedLayout
|
||||||
from PyQt4.QtSvg import QSvgRenderer
|
from PyQt4.QtSvg import QSvgRenderer
|
||||||
|
|
||||||
from calibre import __version__, __appname__, sanitize_file_name, \
|
from calibre import __version__, __appname__, \
|
||||||
iswindows, isosx, prints, patheq
|
iswindows, isosx, prints, patheq
|
||||||
|
from calibre.utils.filenames import ascii_filename
|
||||||
from calibre.ptempfile import PersistentTemporaryFile
|
from calibre.ptempfile import PersistentTemporaryFile
|
||||||
from calibre.utils.config import prefs, dynamic
|
from calibre.utils.config import prefs, dynamic
|
||||||
from calibre.utils.ipc.server import Server
|
from calibre.utils.ipc.server import Server
|
||||||
@ -852,7 +853,7 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
|
|||||||
def _files_added(self, paths=[], names=[], infos=[], on_card=None):
|
def _files_added(self, paths=[], names=[], infos=[], on_card=None):
|
||||||
if paths:
|
if paths:
|
||||||
self.upload_books(paths,
|
self.upload_books(paths,
|
||||||
list(map(sanitize_file_name, names)),
|
list(map(ascii_filename, names)),
|
||||||
infos, on_card=on_card)
|
infos, on_card=on_card)
|
||||||
self.status_bar.showMessage(
|
self.status_bar.showMessage(
|
||||||
_('Uploading books to device.'), 2000)
|
_('Uploading books to device.'), 2000)
|
||||||
|
@ -34,7 +34,7 @@ from calibre.constants import preferred_encoding, iswindows, isosx, filesystem_e
|
|||||||
from calibre.ptempfile import PersistentTemporaryFile
|
from calibre.ptempfile import PersistentTemporaryFile
|
||||||
from calibre.customize.ui import run_plugins_on_import
|
from calibre.customize.ui import run_plugins_on_import
|
||||||
|
|
||||||
from calibre import sanitize_file_name
|
from calibre.utils.filenames import ascii_filename
|
||||||
from calibre.ebooks import BOOK_EXTENSIONS
|
from calibre.ebooks import BOOK_EXTENSIONS
|
||||||
|
|
||||||
if iswindows:
|
if iswindows:
|
||||||
@ -652,8 +652,8 @@ class LibraryDatabase2(LibraryDatabase):
|
|||||||
authors = self.authors(id, index_is_id=True)
|
authors = self.authors(id, index_is_id=True)
|
||||||
if not authors:
|
if not authors:
|
||||||
authors = _('Unknown')
|
authors = _('Unknown')
|
||||||
author = sanitize_file_name(authors.split(',')[0][:self.PATH_LIMIT]).decode(filesystem_encoding, 'ignore')
|
author = ascii_filename(authors.split(',')[0][:self.PATH_LIMIT]).decode(filesystem_encoding, 'ignore')
|
||||||
title = sanitize_file_name(self.title(id, index_is_id=True)[:self.PATH_LIMIT]).decode(filesystem_encoding, 'ignore')
|
title = ascii_filename(self.title(id, index_is_id=True)[:self.PATH_LIMIT]).decode(filesystem_encoding, 'ignore')
|
||||||
path = author + '/' + title + ' (%d)'%id
|
path = author + '/' + title + ' (%d)'%id
|
||||||
return path
|
return path
|
||||||
|
|
||||||
@ -664,8 +664,8 @@ class LibraryDatabase2(LibraryDatabase):
|
|||||||
authors = self.authors(id, index_is_id=True)
|
authors = self.authors(id, index_is_id=True)
|
||||||
if not authors:
|
if not authors:
|
||||||
authors = _('Unknown')
|
authors = _('Unknown')
|
||||||
author = sanitize_file_name(authors.split(',')[0][:self.PATH_LIMIT]).decode(filesystem_encoding, 'replace')
|
author = ascii_filename(authors.split(',')[0][:self.PATH_LIMIT]).decode(filesystem_encoding, 'replace')
|
||||||
title = sanitize_file_name(self.title(id, index_is_id=True)[:self.PATH_LIMIT]).decode(filesystem_encoding, 'replace')
|
title = ascii_filename(self.title(id, index_is_id=True)[:self.PATH_LIMIT]).decode(filesystem_encoding, 'replace')
|
||||||
name = title + ' - ' + author
|
name = title + ' - ' + author
|
||||||
while name.endswith('.'):
|
while name.endswith('.'):
|
||||||
name = name[:-1]
|
name = name[:-1]
|
||||||
@ -1520,12 +1520,12 @@ class LibraryDatabase2(LibraryDatabase):
|
|||||||
x['cover'] = os.path.join(path, 'cover.jpg')
|
x['cover'] = os.path.join(path, 'cover.jpg')
|
||||||
if not self.has_cover(x['id'], index_is_id=True):
|
if not self.has_cover(x['id'], index_is_id=True):
|
||||||
x['cover'] = None
|
x['cover'] = None
|
||||||
path += os.sep + self.construct_file_name(record[FIELD_MAP['id']]) + '.%s'
|
|
||||||
formats = self.formats(record[FIELD_MAP['id']], index_is_id=True)
|
formats = self.formats(record[FIELD_MAP['id']], index_is_id=True)
|
||||||
if formats:
|
if formats:
|
||||||
for fmt in formats.split(','):
|
for fmt in formats.split(','):
|
||||||
x['formats'].append(path%fmt.lower())
|
path = self.format_abspath(x['id'], fmt, index_is_id=True)
|
||||||
x['fmt_'+fmt.lower()] = path%fmt.lower()
|
x['formats'].append(path)
|
||||||
|
x['fmt_'+fmt.lower()] = path
|
||||||
x['available_formats'] = [i.upper() for i in formats.split(',')]
|
x['available_formats'] = [i.upper() for i in formats.split(',')]
|
||||||
|
|
||||||
return data
|
return data
|
||||||
@ -1602,12 +1602,12 @@ books_series_link feeds
|
|||||||
by_author[au] = []
|
by_author[au] = []
|
||||||
by_author[au].append(index)
|
by_author[au].append(index)
|
||||||
for au in by_author.keys():
|
for au in by_author.keys():
|
||||||
apath = os.path.join(dir, sanitize_file_name(au))
|
apath = os.path.join(dir, ascii_filename(au))
|
||||||
if not single_dir and not os.path.exists(apath):
|
if not single_dir and not os.path.exists(apath):
|
||||||
os.mkdir(apath)
|
os.mkdir(apath)
|
||||||
for idx in by_author[au]:
|
for idx in by_author[au]:
|
||||||
title = re.sub(r'\s', ' ', self.title(idx, index_is_id=index_is_id))
|
title = re.sub(r'\s', ' ', self.title(idx, index_is_id=index_is_id))
|
||||||
tpath = os.path.join(apath, sanitize_file_name(title))
|
tpath = os.path.join(apath, ascii_filename(title))
|
||||||
id = idx if index_is_id else self.id(idx)
|
id = idx if index_is_id else self.id(idx)
|
||||||
id = str(id)
|
id = str(id)
|
||||||
if not single_dir and not os.path.exists(tpath):
|
if not single_dir and not os.path.exists(tpath):
|
||||||
@ -1621,10 +1621,10 @@ books_series_link feeds
|
|||||||
mi.authors = [_('Unknown')]
|
mi.authors = [_('Unknown')]
|
||||||
cdata = self.cover(int(id), index_is_id=True)
|
cdata = self.cover(int(id), index_is_id=True)
|
||||||
if cdata is not None:
|
if cdata is not None:
|
||||||
cname = sanitize_file_name(name)+'.jpg'
|
cname = ascii_filename(name)+'.jpg'
|
||||||
open(os.path.join(base, cname), 'wb').write(cdata)
|
open(os.path.join(base, cname), 'wb').write(cdata)
|
||||||
mi.cover = cname
|
mi.cover = cname
|
||||||
with open(os.path.join(base, sanitize_file_name(name)+'.opf'),
|
with open(os.path.join(base, ascii_filename(name)+'.opf'),
|
||||||
'wb') as f:
|
'wb') as f:
|
||||||
f.write(metadata_to_opf(mi))
|
f.write(metadata_to_opf(mi))
|
||||||
|
|
||||||
@ -1636,7 +1636,7 @@ books_series_link feeds
|
|||||||
if not data:
|
if not data:
|
||||||
continue
|
continue
|
||||||
fname = name +'.'+fmt.lower()
|
fname = name +'.'+fmt.lower()
|
||||||
fname = sanitize_file_name(fname)
|
fname = ascii_filename(fname)
|
||||||
f = open(os.path.join(base, fname), 'w+b')
|
f = open(os.path.join(base, fname), 'w+b')
|
||||||
f.write(data)
|
f.write(data)
|
||||||
f.flush()
|
f.flush()
|
||||||
@ -1671,7 +1671,7 @@ books_series_link feeds
|
|||||||
if not au:
|
if not au:
|
||||||
au = _('Unknown')
|
au = _('Unknown')
|
||||||
fname = '%s - %s.%s'%(title, au, format.lower())
|
fname = '%s - %s.%s'%(title, au, format.lower())
|
||||||
fname = sanitize_file_name(fname)
|
fname = ascii_filename(fname)
|
||||||
if not os.path.exists(dir):
|
if not os.path.exists(dir):
|
||||||
os.makedirs(dir)
|
os.makedirs(dir)
|
||||||
f = open(os.path.join(dir, fname), 'w+b')
|
f = open(os.path.join(dir, fname), 'w+b')
|
||||||
|
@ -14,8 +14,8 @@ from httplib import responses
|
|||||||
from PIL import Image
|
from PIL import Image
|
||||||
from cStringIO import StringIO
|
from cStringIO import StringIO
|
||||||
|
|
||||||
from calibre import browser, sanitize_file_name, \
|
from calibre import browser, relpath, unicode_path
|
||||||
relpath, unicode_path
|
from calibre.utils.filenames import ascii_filename
|
||||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
|
||||||
from calibre.ebooks.chardet import xml_to_unicode
|
from calibre.ebooks.chardet import xml_to_unicode
|
||||||
from calibre.utils.config import OptionParser
|
from calibre.utils.config import OptionParser
|
||||||
@ -313,7 +313,7 @@ class RecursiveFetcher(object):
|
|||||||
self.log.exception('Could not fetch image %s'% iurl)
|
self.log.exception('Could not fetch image %s'% iurl)
|
||||||
continue
|
continue
|
||||||
c += 1
|
c += 1
|
||||||
fname = sanitize_file_name('img'+str(c)+ext)
|
fname = ascii_filename('img'+str(c)+ext)
|
||||||
if isinstance(fname, unicode):
|
if isinstance(fname, unicode):
|
||||||
fname = fname.encode('ascii', 'replace')
|
fname = fname.encode('ascii', 'replace')
|
||||||
imgpath = os.path.join(diskpath, fname+'.jpg')
|
imgpath = os.path.join(diskpath, fname+'.jpg')
|
||||||
@ -416,7 +416,7 @@ class RecursiveFetcher(object):
|
|||||||
if not isinstance(_fname, unicode):
|
if not isinstance(_fname, unicode):
|
||||||
_fname.decode('latin1', 'replace')
|
_fname.decode('latin1', 'replace')
|
||||||
_fname = _fname.encode('ascii', 'replace').replace('%', '').replace(os.sep, '')
|
_fname = _fname.encode('ascii', 'replace').replace('%', '').replace(os.sep, '')
|
||||||
_fname = sanitize_file_name(_fname)
|
_fname = ascii_filename(_fname)
|
||||||
_fname = os.path.splitext(_fname)[0]+'.xhtml'
|
_fname = os.path.splitext(_fname)[0]+'.xhtml'
|
||||||
res = os.path.join(linkdiskpath, _fname)
|
res = os.path.join(linkdiskpath, _fname)
|
||||||
self.downloaded_paths.append(res)
|
self.downloaded_paths.append(res)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user