This commit is contained in:
Kovid Goyal 2011-06-12 15:36:49 -06:00
parent db595796fc
commit 0d5c964ea7
3 changed files with 6 additions and 3 deletions

View File

@ -6,7 +6,7 @@ class HBR(BasicNewsRecipe):
title = 'Harvard Business Review Blogs'
description = 'To subscribe go to http://hbr.harvardbusiness.org'
needs_subscription = True
__author__ = 'Kovid Goyal and Sujata Raman, enhanced by BrianG'
__author__ = 'Kovid Goyal, enhanced by BrianG'
language = 'en'
no_stylesheets = True

View File

@ -317,6 +317,9 @@ def feed_from_xml(raw_xml, title=None, oldest_article=7,
max_articles_per_feed=100,
get_article_url=lambda item: item.get('link', None),
log=default_log):
# Handle unclosed escaped entities. They trip up feedparser and HBR for one
# generates them
raw_xml = re.sub(r'(&#\d+)([^0-9;])', r'\1;\2', raw_xml)
feed = parse(raw_xml)
pfeed = Feed(get_article_url=get_article_url, log=log)
pfeed.populate_from_feed(feed, title=title,

View File

@ -13,8 +13,8 @@ from functools import partial
from contextlib import nested, closing
from calibre import browser, __appname__, iswindows, \
strftime, preferred_encoding, as_unicode
from calibre import (browser, __appname__, iswindows,
strftime, preferred_encoding, as_unicode)
from calibre.ebooks.BeautifulSoup import BeautifulSoup, NavigableString, CData, Tag
from calibre.ebooks.metadata.opf2 import OPFCreator
from calibre import entity_to_unicode