mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Fix index parsing of non-ascii web sites
This commit is contained in:
parent
366258b571
commit
3172795aa2
@ -265,10 +265,9 @@ class BasicNewsRecipe(object):
|
|||||||
raw = url_or_raw
|
raw = url_or_raw
|
||||||
if not isinstance(raw, unicode) and self.encoding:
|
if not isinstance(raw, unicode) and self.encoding:
|
||||||
raw = raw.decode(self.encoding)
|
raw = raw.decode(self.encoding)
|
||||||
raw = re.sub(r'&(\S+?);',
|
massage = list(BeautifulSoup.MARKUP_MASSAGE)
|
||||||
lambda match: entity_to_unicode(match, encoding=self.encoding),
|
massage.append((re.compile(r'&(\S+?);'), lambda match: entity_to_unicode(match, encoding=self.encoding)))
|
||||||
raw)
|
return BeautifulSoup(raw, markupMassage=massage)
|
||||||
return BeautifulSoup(raw)
|
|
||||||
|
|
||||||
|
|
||||||
def sort_index_by(self, index, weights):
|
def sort_index_by(self, index, weights):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user