mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Fix reading metadata from CHM files with non-ascii titles
This commit is contained in:
parent
d6b82e3648
commit
b36b552ec6
@ -6,13 +6,14 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
|
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import re
|
import re, codecs
|
||||||
|
|
||||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||||
from calibre.ebooks.chardet import xml_to_unicode
|
from calibre.ebooks.chardet import xml_to_unicode
|
||||||
from calibre.ebooks.metadata import string_to_authors, MetaInformation
|
from calibre.ebooks.metadata import string_to_authors, MetaInformation
|
||||||
from calibre.utils.logging import default_log
|
from calibre.utils.logging import default_log
|
||||||
from calibre.ptempfile import TemporaryFile
|
from calibre.ptempfile import TemporaryFile
|
||||||
|
from calibre import force_unicode
|
||||||
|
|
||||||
def _clean(s):
|
def _clean(s):
|
||||||
return s.replace(u'\u00a0', u' ')
|
return s.replace(u'\u00a0', u' ')
|
||||||
@ -138,6 +139,13 @@ def get_metadata_from_reader(rdr):
|
|||||||
resolve_entities=True)[0])
|
resolve_entities=True)[0])
|
||||||
|
|
||||||
title = rdr.title
|
title = rdr.title
|
||||||
|
try:
|
||||||
|
x = rdr.GetEncoding()
|
||||||
|
codecs.lookup(x)
|
||||||
|
enc = x
|
||||||
|
except:
|
||||||
|
enc = 'cp1252'
|
||||||
|
title = force_unicode(title, enc)
|
||||||
authors = _get_authors(home)
|
authors = _get_authors(home)
|
||||||
mi = MetaInformation(title, authors)
|
mi = MetaInformation(title, authors)
|
||||||
publisher = _get_publisher(home)
|
publisher = _get_publisher(home)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user