diff --git a/src/calibre/__init__.py b/src/calibre/__init__.py
index e1196ea176..e8d4b61ce1 100644
--- a/src/calibre/__init__.py
+++ b/src/calibre/__init__.py
@@ -44,7 +44,7 @@ def osx_version():
_filename_sanitize = re.compile(r'[\xae\0\\|\?\*<":>\+\[\]/]')
-def sanitize_file_name(name, substitute='_'):
+def sanitize_file_name(name, substitute='_', as_unicode=False):
'''
Sanitize the filename `name`. All invalid characters are replaced by `substitute`.
The set of invalid characters is the union of the invalid characters in Windows,
@@ -58,7 +58,10 @@ def sanitize_file_name(name, substitute='_'):
name = name.encode(filesystem_encoding, 'ignore')
one = _filename_sanitize.sub(substitute, name)
one = re.sub(r'\s', ' ', one).strip()
- return re.sub(r'^\.+$', '_', one)
+ one = re.sub(r'^\.+$', '_', one)
+ if as_unicode:
+ one = one.decode(filesystem_encoding)
+ return one
class CommandLineError(Exception):
diff --git a/src/calibre/ebooks/lrf/lrfparser.py b/src/calibre/ebooks/lrf/lrfparser.py
index f24b265a59..a80e06d8e3 100644
--- a/src/calibre/ebooks/lrf/lrfparser.py
+++ b/src/calibre/ebooks/lrf/lrfparser.py
@@ -89,7 +89,7 @@ class LRFDocument(LRFMetaFile):
bookinfo += u'%s\n\n\n'%(self.metadata.free_text,)
th = self.doc_info.thumbnail
if th:
- prefix = sanitize_file_name(self.metadata.title)
+ prefix = sanitize_file_name(self.metadata.title, as_unicode=True)
bookinfo += u'\n'%(prefix+'_thumbnail.'+self.doc_info.thumbnail_extension,)
open(prefix+'_thumbnail.'+self.doc_info.thumbnail_extension, 'wb').write(th)
bookinfo += u'%s\n'%(self.doc_info.language,)
diff --git a/src/calibre/ebooks/metadata/html.py b/src/calibre/ebooks/metadata/html.py
index 8d61a746fa..c8f6c3ea16 100644
--- a/src/calibre/ebooks/metadata/html.py
+++ b/src/calibre/ebooks/metadata/html.py
@@ -10,9 +10,10 @@ Try to read metadata from an HTML file.
import re
from calibre.ebooks.metadata import MetaInformation
+from calibre.ebooks.chardet import xml_to_unicode
def get_metadata(stream):
- src = stream.read()
+ src = xml_to_unicode(stream.read())
# Title
title = None
diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py
index b399cf46e9..0c77705a48 100644
--- a/src/calibre/library/database2.py
+++ b/src/calibre/library/database2.py
@@ -1100,8 +1100,13 @@ class LibraryDatabase2(LibraryDatabase):
continue
series_index = 1 if mi.series_index is None else mi.series_index
aus = mi.author_sort if mi.author_sort else ', '.join(mi.authors)
+ title = mi.title
+ if isinstance(aus, str):
+ aus = aus.decode(preferred_encoding, 'replace')
+ if isinstance(title, str):
+ title = title.decode(preferred_encoding)
obj = self.conn.execute('INSERT INTO books(title, uri, series_index, author_sort) VALUES (?, ?, ?, ?)',
- (mi.title, uri, series_index, aus))
+ (title, uri, series_index, aus))
id = obj.lastrowid
self.data.books_added([id], self.conn)
ids.append(id)