mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Update Amazon metadata download plugin to handle amazon.com website change that was preventing any metadata from being downloaded
Fixes a bug in the default html5lib lxml treebuilder that caused it to fail on pages that have comments with -- or trailing hyphens.
This commit is contained in:
parent
9a9e5d5d7b
commit
c68b9b7d64
@ -309,6 +309,11 @@ class TreeBuilder(_base.TreeBuilder):
|
|||||||
if (parent == self.document and
|
if (parent == self.document and
|
||||||
self.document._elementTree.getroot()[-1].tag == comment_type):
|
self.document._elementTree.getroot()[-1].tag == comment_type):
|
||||||
warnings.warn("lxml cannot represent adjacent comments beyond the root elements", DataLossWarning)
|
warnings.warn("lxml cannot represent adjacent comments beyond the root elements", DataLossWarning)
|
||||||
|
if data['data']:
|
||||||
|
# lxml cannot handle comment text that contains -- or endswith -
|
||||||
|
# Should really check if changes happened and issue a data loss
|
||||||
|
# warning, but that's a fairly big performance hit.
|
||||||
|
data['data'] = data['data'].replace('--', '\u2010\u2010').rstrip('-')
|
||||||
super(TreeBuilder, self).insertComment(data, parent)
|
super(TreeBuilder, self).insertComment(data, parent)
|
||||||
|
|
||||||
def insertRoot(self, token):
|
def insertRoot(self, token):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user