Fix #8115 (Unsupported encoding Windows-1251)

This commit is contained in:
Kovid Goyal 2010-12-30 12:01:35 -07:00
parent 5c38148245
commit cadbd6ba54
2 changed files with 8 additions and 1 deletions

View File

@ -41,9 +41,12 @@ class FB2Input(InputFormatPlugin):
from calibre.ebooks.metadata.opf2 import OPFCreator from calibre.ebooks.metadata.opf2 import OPFCreator
from calibre.ebooks.metadata.meta import get_metadata from calibre.ebooks.metadata.meta import get_metadata
from calibre.ebooks.oeb.base import XLINK_NS, XHTML_NS, RECOVER_PARSER from calibre.ebooks.oeb.base import XLINK_NS, XHTML_NS, RECOVER_PARSER
from calibre.ebooks.chardet import xml_to_unicode
NAMESPACES = {'f':FB2NS, 'l':XLINK_NS} NAMESPACES = {'f':FB2NS, 'l':XLINK_NS}
log.debug('Parsing XML...') log.debug('Parsing XML...')
raw = stream.read().replace('\0', '') raw = stream.read().replace('\0', '')
raw = xml_to_unicode(raw, strip_encoding_pats=True,
assume_utf8=True)[0]
try: try:
doc = etree.fromstring(raw) doc = etree.fromstring(raw)
except etree.XMLSyntaxError: except etree.XMLSyntaxError:

View File

@ -9,6 +9,7 @@ import mimetypes, os
from base64 import b64decode from base64 import b64decode
from lxml import etree from lxml import etree
from calibre.ebooks.metadata import MetaInformation from calibre.ebooks.metadata import MetaInformation
from calibre.ebooks.chardet import xml_to_unicode
XLINK_NS = 'http://www.w3.org/1999/xlink' XLINK_NS = 'http://www.w3.org/1999/xlink'
def XLINK(name): def XLINK(name):
@ -23,7 +24,10 @@ def get_metadata(stream):
tostring = lambda x : etree.tostring(x, method='text', tostring = lambda x : etree.tostring(x, method='text',
encoding=unicode).strip() encoding=unicode).strip()
parser = etree.XMLParser(recover=True, no_network=True) parser = etree.XMLParser(recover=True, no_network=True)
root = etree.fromstring(stream.read(), parser=parser) raw = stream.read()
raw = xml_to_unicode(raw, strip_encoding_pats=True,
assume_utf8=True)[0]
root = etree.fromstring(raw, parser=parser)
authors, author_sort = [], None authors, author_sort = [], None
for au in XPath('//fb2:author')(root): for au in XPath('//fb2:author')(root):
fname = lname = author = None fname = lname = author = None