diff --git a/src/calibre/__init__.py b/src/calibre/__init__.py index e1196ea176..e8d4b61ce1 100644 --- a/src/calibre/__init__.py +++ b/src/calibre/__init__.py @@ -44,7 +44,7 @@ def osx_version(): _filename_sanitize = re.compile(r'[\xae\0\\|\?\*<":>\+\[\]/]') -def sanitize_file_name(name, substitute='_'): +def sanitize_file_name(name, substitute='_', as_unicode=False): ''' Sanitize the filename `name`. All invalid characters are replaced by `substitute`. The set of invalid characters is the union of the invalid characters in Windows, @@ -58,7 +58,10 @@ def sanitize_file_name(name, substitute='_'): name = name.encode(filesystem_encoding, 'ignore') one = _filename_sanitize.sub(substitute, name) one = re.sub(r'\s', ' ', one).strip() - return re.sub(r'^\.+$', '_', one) + one = re.sub(r'^\.+$', '_', one) + if as_unicode: + one = one.decode(filesystem_encoding) + return one class CommandLineError(Exception): diff --git a/src/calibre/ebooks/lrf/lrfparser.py b/src/calibre/ebooks/lrf/lrfparser.py index f24b265a59..a80e06d8e3 100644 --- a/src/calibre/ebooks/lrf/lrfparser.py +++ b/src/calibre/ebooks/lrf/lrfparser.py @@ -89,7 +89,7 @@ class LRFDocument(LRFMetaFile): bookinfo += u'%s\n\n\n'%(self.metadata.free_text,) th = self.doc_info.thumbnail if th: - prefix = sanitize_file_name(self.metadata.title) + prefix = sanitize_file_name(self.metadata.title, as_unicode=True) bookinfo += u'\n'%(prefix+'_thumbnail.'+self.doc_info.thumbnail_extension,) open(prefix+'_thumbnail.'+self.doc_info.thumbnail_extension, 'wb').write(th) bookinfo += u'%s\n'%(self.doc_info.language,) diff --git a/src/calibre/ebooks/metadata/html.py b/src/calibre/ebooks/metadata/html.py index 8d61a746fa..c8f6c3ea16 100644 --- a/src/calibre/ebooks/metadata/html.py +++ b/src/calibre/ebooks/metadata/html.py @@ -10,9 +10,10 @@ Try to read metadata from an HTML file. import re from calibre.ebooks.metadata import MetaInformation +from calibre.ebooks.chardet import xml_to_unicode def get_metadata(stream): - src = stream.read() + src = xml_to_unicode(stream.read()) # Title title = None diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py index b399cf46e9..0c77705a48 100644 --- a/src/calibre/library/database2.py +++ b/src/calibre/library/database2.py @@ -1100,8 +1100,13 @@ class LibraryDatabase2(LibraryDatabase): continue series_index = 1 if mi.series_index is None else mi.series_index aus = mi.author_sort if mi.author_sort else ', '.join(mi.authors) + title = mi.title + if isinstance(aus, str): + aus = aus.decode(preferred_encoding, 'replace') + if isinstance(title, str): + title = title.decode(preferred_encoding) obj = self.conn.execute('INSERT INTO books(title, uri, series_index, author_sort) VALUES (?, ?, ?, ?)', - (mi.title, uri, series_index, aus)) + (title, uri, series_index, aus)) id = obj.lastrowid self.data.books_added([id], self.conn) ids.append(id)