diff --git a/src/calibre/ebooks/fb2/input.py b/src/calibre/ebooks/fb2/input.py index 2b08a716cc..1f9a3ffe95 100644 --- a/src/calibre/ebooks/fb2/input.py +++ b/src/calibre/ebooks/fb2/input.py @@ -41,9 +41,12 @@ class FB2Input(InputFormatPlugin): from calibre.ebooks.metadata.opf2 import OPFCreator from calibre.ebooks.metadata.meta import get_metadata from calibre.ebooks.oeb.base import XLINK_NS, XHTML_NS, RECOVER_PARSER + from calibre.ebooks.chardet import xml_to_unicode NAMESPACES = {'f':FB2NS, 'l':XLINK_NS} log.debug('Parsing XML...') raw = stream.read().replace('\0', '') + raw = xml_to_unicode(raw, strip_encoding_pats=True, + assume_utf8=True)[0] try: doc = etree.fromstring(raw) except etree.XMLSyntaxError: diff --git a/src/calibre/ebooks/metadata/fb2.py b/src/calibre/ebooks/metadata/fb2.py index 3636b89df4..2d6192f949 100644 --- a/src/calibre/ebooks/metadata/fb2.py +++ b/src/calibre/ebooks/metadata/fb2.py @@ -9,6 +9,7 @@ import mimetypes, os from base64 import b64decode from lxml import etree from calibre.ebooks.metadata import MetaInformation +from calibre.ebooks.chardet import xml_to_unicode XLINK_NS = 'http://www.w3.org/1999/xlink' def XLINK(name): @@ -23,7 +24,10 @@ def get_metadata(stream): tostring = lambda x : etree.tostring(x, method='text', encoding=unicode).strip() parser = etree.XMLParser(recover=True, no_network=True) - root = etree.fromstring(stream.read(), parser=parser) + raw = stream.read() + raw = xml_to_unicode(raw, strip_encoding_pats=True, + assume_utf8=True)[0] + root = etree.fromstring(raw, parser=parser) authors, author_sort = [], None for au in XPath('//fb2:author')(root): fname = lname = author = None