py3: More fixes for news downloads

2025-07-09 03:04:10 -04:00 · 2019-04-23 16:04:20 +05:30 · 2019-04-23 16:04:20 +05:30 · c12c80e174
commit c12c80e174
parent bfbc31fa9f
2 changed files with 2 additions and 3 deletions
--- a/src/calibre/ebooks/readability/readability.py
+++ b/src/calibre/ebooks/readability/readability.py
@ -8,7 +8,6 @@ from collections import defaultdict

 from polyglot.builtins import reraise, unicode_type

-from lxml.etree import tostring
 from lxml.html import (fragment_fromstring, document_fromstring,
        tostring as htostring)

@ -315,7 +314,7 @@ class Document:
    def transform_misused_divs_into_paragraphs(self):
        for elem in self.tags(self.html, 'div'):
            # transform <div>s that do not contain other block elements into <p>s
-            if not REGEXES['divToPElementsRe'].search(unicode_type(''.join(map(tostring, list(elem))))):
+            if not REGEXES['divToPElementsRe'].search(unicode_type(''.join(map(tounicode, list(elem))))):
                # self.debug("Altering %s to p" % (describe(elem)))
                elem.tag = "p"
                # print "Fixed element "+describe(elem)
--- a/src/calibre/web/feeds/init.py
+++ b/src/calibre/web/feeds/init.py
@ -341,7 +341,7 @@ def feed_from_xml(raw_xml, title=None, oldest_article=7,
    from calibre.web.feeds.feedparser import parse
    # Handle unclosed escaped entities. They trip up feedparser and HBR for one
    # generates them
-    raw_xml = re.sub(r'(&amp;#\d+)([^0-9;])', r'\1;\2', raw_xml)
+    raw_xml = re.sub(br'(&amp;#\d+)([^0-9;])', br'\1;\2', raw_xml)
    feed = parse(raw_xml)
    pfeed = Feed(get_article_url=get_article_url, log=log)
    pfeed.populate_from_feed(feed, title=title,