From 0d5c964ea72635b0f1cdec0357abb61838d7f70d Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 12 Jun 2011 15:36:49 -0600 Subject: [PATCH] ... --- recipes/hbr_blogs.recipe | 2 +- src/calibre/web/feeds/__init__.py | 3 +++ src/calibre/web/feeds/news.py | 4 ++-- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/recipes/hbr_blogs.recipe b/recipes/hbr_blogs.recipe index bd72a95ebf..acee567d8d 100644 --- a/recipes/hbr_blogs.recipe +++ b/recipes/hbr_blogs.recipe @@ -6,7 +6,7 @@ class HBR(BasicNewsRecipe): title = 'Harvard Business Review Blogs' description = 'To subscribe go to http://hbr.harvardbusiness.org' needs_subscription = True - __author__ = 'Kovid Goyal and Sujata Raman, enhanced by BrianG' + __author__ = 'Kovid Goyal, enhanced by BrianG' language = 'en' no_stylesheets = True diff --git a/src/calibre/web/feeds/__init__.py b/src/calibre/web/feeds/__init__.py index a10fb03f91..dbd1f74f82 100644 --- a/src/calibre/web/feeds/__init__.py +++ b/src/calibre/web/feeds/__init__.py @@ -317,6 +317,9 @@ def feed_from_xml(raw_xml, title=None, oldest_article=7, max_articles_per_feed=100, get_article_url=lambda item: item.get('link', None), log=default_log): + # Handle unclosed escaped entities. They trip up feedparser and HBR for one + # generates them + raw_xml = re.sub(r'(&#\d+)([^0-9;])', r'\1;\2', raw_xml) feed = parse(raw_xml) pfeed = Feed(get_article_url=get_article_url, log=log) pfeed.populate_from_feed(feed, title=title, diff --git a/src/calibre/web/feeds/news.py b/src/calibre/web/feeds/news.py index 325fcf5209..c74a9b662c 100644 --- a/src/calibre/web/feeds/news.py +++ b/src/calibre/web/feeds/news.py @@ -13,8 +13,8 @@ from functools import partial from contextlib import nested, closing -from calibre import browser, __appname__, iswindows, \ - strftime, preferred_encoding, as_unicode +from calibre import (browser, __appname__, iswindows, + strftime, preferred_encoding, as_unicode) from calibre.ebooks.BeautifulSoup import BeautifulSoup, NavigableString, CData, Tag from calibre.ebooks.metadata.opf2 import OPFCreator from calibre import entity_to_unicode