diff --git a/recipes/chicago_tribune.recipe b/recipes/chicago_tribune.recipe index 684993e251..b49aee4b5a 100644 --- a/recipes/chicago_tribune.recipe +++ b/recipes/chicago_tribune.recipe @@ -3,13 +3,12 @@ __license__ = 'GPL 3' __copyright__ = '2009, Kovid Goyal ' __docformat__ = 'restructuredtext en' -import urllib, re from calibre.web.feeds.news import BasicNewsRecipe class ChicagoTribune(BasicNewsRecipe): title = 'Chicago Tribune' - __author__ = 'Kovid Goyal and Sujata Raman, a.peter' + __author__ = 'Kovid Goyal, Sujata Raman and a.peter' description = 'Politics, local and business news from Chicago' language = 'en' version = 2 @@ -23,11 +22,16 @@ class ChicagoTribune(BasicNewsRecipe): dict(name='div', attrs={'id':["pagebody","story","maincontentcontainer"]}), ] remove_tags_after = [{'class':['photo_article',]}] + remove_empty_feeds = True match_regexps = [r'page=[0-9]+'] - remove_tags = [{'id':["moduleArticleTools","content-bottom","rail","articleRelates module","toolSet","relatedrailcontent","div-wrapper","beta","atp-comments","footer",'gallery-subcontent','subFooter']}, - {'class':["clearfix","relatedTitle","articleRelates module","asset-footer","tools","comments","featurePromo","featurePromo fp-topjobs brownBackground","clearfix fullSpan brownBackground","curvedContent",'nextgen-share-tools','outbrainTools', 'google-ad-story-bottom']}, + remove_tags = [ + {'id':["moduleArticleTools","content-bottom","rail","articleRelates module","toolSet", + "relatedrailcontent","div-wrapper","beta","atp-comments","footer",'gallery-subcontent','subFooter']}, + {'class':["clearfix","relatedTitle","articleRelates module", + "asset-footer","tools","comments","featurePromo","featurePromo fp-topjobs brownBackground", 'ndn_embed', + "clearfix fullSpan brownBackground","curvedContent",'nextgen-share-tools','outbrainTools', 'google-ad-story-bottom']}, dict(name='font',attrs={'id':["cr-other-headlines"]})] extra_css = ''' h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;} @@ -51,7 +55,7 @@ class ChicagoTribune(BasicNewsRecipe): ('Most E-mailed stories', 'http://feeds.chicagotribune.com/chicagotribune/email/'), ('Opinion', 'http://feeds.chicagotribune.com/chicagotribune/opinion/'), ('Off Topic', 'http://feeds.chicagotribune.com/chicagotribune/offtopic/'), - #('Politics', 'http://feeds.chicagotribune.com/chicagotribune/politics/'), + ('Politics', 'http://feeds.feedburner.com/chicagotribune/cloutstreet/'), #('Special Reports', 'http://feeds.chicagotribune.com/chicagotribune/special/'), #('Religion News', 'http://feeds.chicagotribune.com/chicagotribune/religion/'), ('Business news', 'http://feeds.chicagotribune.com/chicagotribune/business/'), @@ -76,19 +80,15 @@ class ChicagoTribune(BasicNewsRecipe): ('Julie\'s Health Club', 'http://feeds.chicagotribune.com/chicagotribune_julieshealthclub/'), ] - def get_article_url(self, article): - ans = None - try: - s = article.summary - ans = urllib.unquote( - re.search(r'href=".+?bookmark.cfm.+?link=(.+?)"', s).group(1)) - except: - pass - if ans is None: - ans = article.get('feedburner_origlink', article.get('guid', article.get('link'))) - if ans is not None: - return ans.replace('?track=rss', '') + url = BasicNewsRecipe.get_article_url(self, article) + url = url.split('/')[-2] + encoding = {'0B': '.', '0C': '/', '0A': '0', '0F': '=', '0G': '&', + '0D': '?', '0E': '-', '0N': '.com', '0L': 'http://', '0S': + 'www.', '0I': '_', '0H': ','} + for k, v in encoding.iteritems(): + url = url.replace(k, v) + return url.partition('?')[0] def skip_ad_pages(self, soup): text = soup.find(text='click here to continue to article')