Fix #8115 (Unsupported encoding Windows-1251)

This commit is contained in:
Kovid Goyal 2010-12-30 12:01:35 -07:00
parent 5c38148245
commit cadbd6ba54
2 changed files with 8 additions and 1 deletions

View File

@ -41,9 +41,12 @@ class FB2Input(InputFormatPlugin):
from calibre.ebooks.metadata.opf2 import OPFCreator
from calibre.ebooks.metadata.meta import get_metadata
from calibre.ebooks.oeb.base import XLINK_NS, XHTML_NS, RECOVER_PARSER
from calibre.ebooks.chardet import xml_to_unicode
NAMESPACES = {'f':FB2NS, 'l':XLINK_NS}
log.debug('Parsing XML...')
raw = stream.read().replace('\0', '')
raw = xml_to_unicode(raw, strip_encoding_pats=True,
assume_utf8=True)[0]
try:
doc = etree.fromstring(raw)
except etree.XMLSyntaxError:

View File

@ -9,6 +9,7 @@ import mimetypes, os
from base64 import b64decode
from lxml import etree
from calibre.ebooks.metadata import MetaInformation
from calibre.ebooks.chardet import xml_to_unicode
XLINK_NS = 'http://www.w3.org/1999/xlink'
def XLINK(name):
@ -23,7 +24,10 @@ def get_metadata(stream):
tostring = lambda x : etree.tostring(x, method='text',
encoding=unicode).strip()
parser = etree.XMLParser(recover=True, no_network=True)
root = etree.fromstring(stream.read(), parser=parser)
raw = stream.read()
raw = xml_to_unicode(raw, strip_encoding_pats=True,
assume_utf8=True)[0]
root = etree.fromstring(raw, parser=parser)
authors, author_sort = [], None
for au in XPath('//fb2:author')(root):
fname = lname = author = None