mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-08 02:34:06 -04:00
Ignore DataLossWarnings
This commit is contained in:
parent
62ebb5e3f6
commit
03a39f15d1
@ -6,14 +6,14 @@ from __future__ import (unicode_literals, division, absolute_import,
|
|||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
|
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||||
|
|
||||||
import copy, re
|
import copy, re, warnings
|
||||||
from functools import partial
|
from functools import partial
|
||||||
|
|
||||||
from lxml.etree import ElementBase, XMLParser, ElementDefaultClassLookup, CommentBase
|
from lxml.etree import ElementBase, XMLParser, ElementDefaultClassLookup, CommentBase
|
||||||
|
|
||||||
from html5lib.constants import namespaces, tableInsertModeElements
|
from html5lib.constants import namespaces, tableInsertModeElements
|
||||||
from html5lib.treebuilders._base import TreeBuilder as BaseTreeBuilder
|
from html5lib.treebuilders._base import TreeBuilder as BaseTreeBuilder
|
||||||
from html5lib.ihatexml import InfosetFilter
|
from html5lib.ihatexml import InfosetFilter, DataLossWarning
|
||||||
from html5lib.html5parser import HTMLParser
|
from html5lib.html5parser import HTMLParser
|
||||||
|
|
||||||
from calibre.ebooks.chardet import xml_to_unicode
|
from calibre.ebooks.chardet import xml_to_unicode
|
||||||
@ -357,11 +357,12 @@ def parse(raw, decoder=None, log=None):
|
|||||||
raw = xml_to_unicode(raw)[0] if decoder is None else decoder(raw)
|
raw = xml_to_unicode(raw)[0] if decoder is None else decoder(raw)
|
||||||
# TODO: Replace entities?
|
# TODO: Replace entities?
|
||||||
raw = fix_self_closing_cdata_tags(raw) # TODO: Handle this in the parser
|
raw = fix_self_closing_cdata_tags(raw) # TODO: Handle this in the parser
|
||||||
# TODO: ignore warnings
|
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
parser = HTMLParser(tree=TreeBuilder)
|
parser = HTMLParser(tree=TreeBuilder)
|
||||||
parser.parse(raw, parseMeta=False, useChardet=False)
|
with warnings.catch_warnings():
|
||||||
|
warnings.simplefilter('ignore', category=DataLossWarning)
|
||||||
|
parser.parse(raw, parseMeta=False, useChardet=False)
|
||||||
except NamespacedHTMLPresent as err:
|
except NamespacedHTMLPresent as err:
|
||||||
raw = re.sub(r'<\s*/{0,1}(%s:)' % err.prefix, lambda m: m.group().replace(m.group(1), ''), raw, flags=re.I)
|
raw = re.sub(r'<\s*/{0,1}(%s:)' % err.prefix, lambda m: m.group().replace(m.group(1), ''), raw, flags=re.I)
|
||||||
continue
|
continue
|
||||||
|
Loading…
x
Reference in New Issue
Block a user