From c12c80e1746325ea5449f349906e2b1f74467746 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 23 Apr 2019 16:04:20 +0530 Subject: [PATCH] py3: More fixes for news downloads --- src/calibre/ebooks/readability/readability.py | 3 +-- src/calibre/web/feeds/__init__.py | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/calibre/ebooks/readability/readability.py b/src/calibre/ebooks/readability/readability.py index b7cefa1ad9..98ad5af723 100644 --- a/src/calibre/ebooks/readability/readability.py +++ b/src/calibre/ebooks/readability/readability.py @@ -8,7 +8,6 @@ from collections import defaultdict from polyglot.builtins import reraise, unicode_type -from lxml.etree import tostring from lxml.html import (fragment_fromstring, document_fromstring, tostring as htostring) @@ -315,7 +314,7 @@ class Document: def transform_misused_divs_into_paragraphs(self): for elem in self.tags(self.html, 'div'): # transform
s that do not contain other block elements into

s - if not REGEXES['divToPElementsRe'].search(unicode_type(''.join(map(tostring, list(elem))))): + if not REGEXES['divToPElementsRe'].search(unicode_type(''.join(map(tounicode, list(elem))))): # self.debug("Altering %s to p" % (describe(elem))) elem.tag = "p" # print "Fixed element "+describe(elem) diff --git a/src/calibre/web/feeds/__init__.py b/src/calibre/web/feeds/__init__.py index c0303e3e1c..3b42ec7f18 100644 --- a/src/calibre/web/feeds/__init__.py +++ b/src/calibre/web/feeds/__init__.py @@ -341,7 +341,7 @@ def feed_from_xml(raw_xml, title=None, oldest_article=7, from calibre.web.feeds.feedparser import parse # Handle unclosed escaped entities. They trip up feedparser and HBR for one # generates them - raw_xml = re.sub(r'(&#\d+)([^0-9;])', r'\1;\2', raw_xml) + raw_xml = re.sub(br'(&#\d+)([^0-9;])', br'\1;\2', raw_xml) feed = parse(raw_xml) pfeed = Feed(get_article_url=get_article_url, log=log) pfeed.populate_from_feed(feed, title=title,