Fix bug in adding html files with non ascii characters in their metadta to the library. Also fix regression in lrf2lrs for LRF file with non ASCII titles.

This commit is contained in:
Kovid Goyal 2008-12-30 21:24:46 -08:00
parent e05286e773
commit 5a0263bfd1
4 changed files with 14 additions and 5 deletions

View File

@ -44,7 +44,7 @@ def osx_version():
_filename_sanitize = re.compile(r'[\xae\0\\|\?\*<":>\+\[\]/]')
def sanitize_file_name(name, substitute='_'):
def sanitize_file_name(name, substitute='_', as_unicode=False):
'''
Sanitize the filename `name`. All invalid characters are replaced by `substitute`.
The set of invalid characters is the union of the invalid characters in Windows,
@ -58,7 +58,10 @@ def sanitize_file_name(name, substitute='_'):
name = name.encode(filesystem_encoding, 'ignore')
one = _filename_sanitize.sub(substitute, name)
one = re.sub(r'\s', ' ', one).strip()
return re.sub(r'^\.+$', '_', one)
one = re.sub(r'^\.+$', '_', one)
if as_unicode:
one = one.decode(filesystem_encoding)
return one
class CommandLineError(Exception):

View File

@ -89,7 +89,7 @@ class LRFDocument(LRFMetaFile):
bookinfo += u'<FreeText reading="">%s</FreeText>\n</BookInfo>\n<DocInfo>\n'%(self.metadata.free_text,)
th = self.doc_info.thumbnail
if th:
prefix = sanitize_file_name(self.metadata.title)
prefix = sanitize_file_name(self.metadata.title, as_unicode=True)
bookinfo += u'<CThumbnail file="%s" />\n'%(prefix+'_thumbnail.'+self.doc_info.thumbnail_extension,)
open(prefix+'_thumbnail.'+self.doc_info.thumbnail_extension, 'wb').write(th)
bookinfo += u'<Language reading="">%s</Language>\n'%(self.doc_info.language,)

View File

@ -10,9 +10,10 @@ Try to read metadata from an HTML file.
import re
from calibre.ebooks.metadata import MetaInformation
from calibre.ebooks.chardet import xml_to_unicode
def get_metadata(stream):
src = stream.read()
src = xml_to_unicode(stream.read())
# Title
title = None

View File

@ -1100,8 +1100,13 @@ class LibraryDatabase2(LibraryDatabase):
continue
series_index = 1 if mi.series_index is None else mi.series_index
aus = mi.author_sort if mi.author_sort else ', '.join(mi.authors)
title = mi.title
if isinstance(aus, str):
aus = aus.decode(preferred_encoding, 'replace')
if isinstance(title, str):
title = title.decode(preferred_encoding)
obj = self.conn.execute('INSERT INTO books(title, uri, series_index, author_sort) VALUES (?, ?, ?, ?)',
(mi.title, uri, series_index, aus))
(title, uri, series_index, aus))
id = obj.lastrowid
self.data.books_added([id], self.conn)
ids.append(id)