mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
BeautifulSoup: Port fix from upstream for outputting bare ampersands in strings. Fixes #1769481 [Calibre inserting semicolons](https://bugs.launchpad.net/calibre/+bug/1769481)
This commit is contained in:
parent
073ce5522f
commit
6e91a17650
@ -101,6 +101,14 @@ class PageElement:
|
|||||||
"""Contains the navigational information for some part of the page
|
"""Contains the navigational information for some part of the page
|
||||||
(either a tag or a piece of text)"""
|
(either a tag or a piece of text)"""
|
||||||
|
|
||||||
|
XML_ENTITIES_TO_SPECIAL_CHARS = { "apos" : "'",
|
||||||
|
"quot" : '"',
|
||||||
|
"amp" : "&",
|
||||||
|
"lt" : "<",
|
||||||
|
"gt" : ">" }
|
||||||
|
XML_SPECIAL_CHARS_TO_ENTITIES = {v: k for k, v in XML_ENTITIES_TO_SPECIAL_CHARS.items()}
|
||||||
|
|
||||||
|
|
||||||
def setup(self, parent=None, previous=None):
|
def setup(self, parent=None, previous=None):
|
||||||
"""Sets up the initial relations between this element and
|
"""Sets up the initial relations between this element and
|
||||||
other elements."""
|
other elements."""
|
||||||
@ -338,31 +346,31 @@ class PageElement:
|
|||||||
#NavigableStrings and Tags.
|
#NavigableStrings and Tags.
|
||||||
def nextGenerator(self):
|
def nextGenerator(self):
|
||||||
i = self
|
i = self
|
||||||
while i:
|
while i is not None:
|
||||||
i = i.next
|
i = i.next
|
||||||
yield i
|
yield i
|
||||||
|
|
||||||
def nextSiblingGenerator(self):
|
def nextSiblingGenerator(self):
|
||||||
i = self
|
i = self
|
||||||
while i:
|
while i is not None:
|
||||||
i = i.nextSibling
|
i = i.nextSibling
|
||||||
yield i
|
yield i
|
||||||
|
|
||||||
def previousGenerator(self):
|
def previousGenerator(self):
|
||||||
i = self
|
i = self
|
||||||
while i:
|
while i is not None:
|
||||||
i = i.previous
|
i = i.previous
|
||||||
yield i
|
yield i
|
||||||
|
|
||||||
def previousSiblingGenerator(self):
|
def previousSiblingGenerator(self):
|
||||||
i = self
|
i = self
|
||||||
while i:
|
while i is not None:
|
||||||
i = i.previousSibling
|
i = i.previousSibling
|
||||||
yield i
|
yield i
|
||||||
|
|
||||||
def parentGenerator(self):
|
def parentGenerator(self):
|
||||||
i = self
|
i = self
|
||||||
while i:
|
while i is not None:
|
||||||
i = i.parent
|
i = i.parent
|
||||||
yield i
|
yield i
|
||||||
|
|
||||||
@ -389,6 +397,16 @@ class PageElement:
|
|||||||
s = unicode(s)
|
s = unicode(s)
|
||||||
return s
|
return s
|
||||||
|
|
||||||
|
BARE_AMPERSAND_OR_BRACKET = re.compile("([<>]|"
|
||||||
|
+ "&(?!#\d+;|#x[0-9a-fA-F]+;|\w+;)"
|
||||||
|
+ ")")
|
||||||
|
|
||||||
|
def _sub_entity(self, x):
|
||||||
|
"""Used with a regular expression to substitute the
|
||||||
|
appropriate XML entity for an XML special character."""
|
||||||
|
return "&" + self.XML_SPECIAL_CHARS_TO_ENTITIES[x.group(0)[0]] + ";"
|
||||||
|
|
||||||
|
|
||||||
class NavigableString(unicode, PageElement):
|
class NavigableString(unicode, PageElement):
|
||||||
|
|
||||||
def __getnewargs__(self):
|
def __getnewargs__(self):
|
||||||
@ -407,10 +425,12 @@ class NavigableString(unicode, PageElement):
|
|||||||
return unicode(str(self), DEFAULT_OUTPUT_ENCODING) # Changed by Kovid
|
return unicode(str(self), DEFAULT_OUTPUT_ENCODING) # Changed by Kovid
|
||||||
|
|
||||||
def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING):
|
def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING):
|
||||||
|
# Substitute outgoing XML entities.
|
||||||
|
data = self.BARE_AMPERSAND_OR_BRACKET.sub(self._sub_entity, self)
|
||||||
if encoding:
|
if encoding:
|
||||||
return self.encode(encoding)
|
return data.encode(encoding)
|
||||||
else:
|
else:
|
||||||
return self
|
return data
|
||||||
|
|
||||||
class CData(NavigableString):
|
class CData(NavigableString):
|
||||||
|
|
||||||
@ -596,15 +616,6 @@ class Tag(PageElement):
|
|||||||
def __unicode__(self):
|
def __unicode__(self):
|
||||||
return self.__str__(None)
|
return self.__str__(None)
|
||||||
|
|
||||||
BARE_AMPERSAND_OR_BRACKET = re.compile("([<>]|"
|
|
||||||
+ "&(?!#\d+;|#x[0-9a-fA-F]+;|\w+;)"
|
|
||||||
+ ")")
|
|
||||||
|
|
||||||
def _sub_entity(self, x):
|
|
||||||
"""Used with a regular expression to substitute the
|
|
||||||
appropriate XML entity for an XML special character."""
|
|
||||||
return "&" + self.XML_SPECIAL_CHARS_TO_ENTITIES[x.group(0)[0]] + ";"
|
|
||||||
|
|
||||||
def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING,
|
def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING,
|
||||||
prettyPrint=False, indentLevel=0):
|
prettyPrint=False, indentLevel=0):
|
||||||
"""Returns a string or Unicode representation of this tag and
|
"""Returns a string or Unicode representation of this tag and
|
||||||
|
Loading…
x
Reference in New Issue
Block a user