Fix index parsing of non-ascii web sites

2025-07-09 03:04:10 -04:00 · 2008-03-24 07:29:07 +00:00 · 2008-03-24 07:29:07 +00:00 · 3172795aa2
commit 3172795aa2
parent 366258b571
1 changed files with 3 additions and 4 deletions
--- a/src/libprs500/web/feeds/news.py
+++ b/src/libprs500/web/feeds/news.py
@ -265,10 +265,9 @@ class BasicNewsRecipe(object):
            raw = url_or_raw
        if not isinstance(raw, unicode) and self.encoding:
            raw = raw.decode(self.encoding)
-        raw = re.sub(r'&(\S+?);', 
+        massage = list(BeautifulSoup.MARKUP_MASSAGE)
-                     lambda match: entity_to_unicode(match, encoding=self.encoding), 
+        massage.append((re.compile(r'&(\S+?);'), lambda match: entity_to_unicode(match, encoding=self.encoding))) 
-                     raw)
+        return BeautifulSoup(raw, markupMassage=massage)
        return BeautifulSoup(raw)
    def sort_index_by(self, index, weights):