Update Chicago Tribune

2025-08-11 09:13:57 -04:00 · 2014-01-02 11:06:30 +05:30 · 2014-01-02 11:06:30 +05:30 · db07598b69
commit db07598b69
parent bea02304ee
1 changed files with 17 additions and 17 deletions
--- a/recipes/chicago_tribune.recipe
+++ b/recipes/chicago_tribune.recipe
@ -3,13 +3,12 @@ __license__ = 'GPL 3'
 __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'

-import urllib, re
 from calibre.web.feeds.news import BasicNewsRecipe

 class ChicagoTribune(BasicNewsRecipe):

    title       = 'Chicago Tribune'
-    __author__  = 'Kovid Goyal and Sujata Raman, a.peter'
+    __author__  = 'Kovid Goyal, Sujata Raman and a.peter'
    description = 'Politics, local and business news from Chicago'
    language    = 'en'
    version     = 2
@ -23,11 +22,16 @@ class ChicagoTribune(BasicNewsRecipe):
                      dict(name='div', attrs={'id':["pagebody","story","maincontentcontainer"]}),
                           ]
    remove_tags_after = [{'class':['photo_article',]}]
+    remove_empty_feeds = True

    match_regexps = [r'page=[0-9]+']

-    remove_tags = [{'id':["moduleArticleTools","content-bottom","rail","articleRelates module","toolSet","relatedrailcontent","div-wrapper","beta","atp-comments","footer",'gallery-subcontent','subFooter']},
-                   {'class':["clearfix","relatedTitle","articleRelates module","asset-footer","tools","comments","featurePromo","featurePromo fp-topjobs brownBackground","clearfix fullSpan brownBackground","curvedContent",'nextgen-share-tools','outbrainTools', 'google-ad-story-bottom']},
+    remove_tags = [
+        {'id':["moduleArticleTools","content-bottom","rail","articleRelates module","toolSet",
+            "relatedrailcontent","div-wrapper","beta","atp-comments","footer",'gallery-subcontent','subFooter']},
+        {'class':["clearfix","relatedTitle","articleRelates module",
+                  "asset-footer","tools","comments","featurePromo","featurePromo fp-topjobs brownBackground", 'ndn_embed',
+                  "clearfix fullSpan brownBackground","curvedContent",'nextgen-share-tools','outbrainTools', 'google-ad-story-bottom']},
                   dict(name='font',attrs={'id':["cr-other-headlines"]})]
    extra_css = '''
                    h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
@ -51,7 +55,7 @@ class ChicagoTribune(BasicNewsRecipe):
             ('Most E-mailed stories', 'http://feeds.chicagotribune.com/chicagotribune/email/'),
             ('Opinion', 'http://feeds.chicagotribune.com/chicagotribune/opinion/'),
             ('Off Topic', 'http://feeds.chicagotribune.com/chicagotribune/offtopic/'),
-             #('Politics', 'http://feeds.chicagotribune.com/chicagotribune/politics/'),
+             ('Politics', 'http://feeds.feedburner.com/chicagotribune/cloutstreet/'),
             #('Special Reports', 'http://feeds.chicagotribune.com/chicagotribune/special/'),
             #('Religion News', 'http://feeds.chicagotribune.com/chicagotribune/religion/'),
             ('Business news', 'http://feeds.chicagotribune.com/chicagotribune/business/'),
@ -76,19 +80,15 @@ class ChicagoTribune(BasicNewsRecipe):
             ('Julie\'s Health Club', 'http://feeds.chicagotribune.com/chicagotribune_julieshealthclub/'),
             ]

-
    def get_article_url(self, article):
-        ans = None
-        try:
-            s = article.summary
-            ans = urllib.unquote(
-                re.search(r'href=".+?bookmark.cfm.+?link=(.+?)"', s).group(1))
-        except:
-            pass
-        if ans is None:
-            ans = article.get('feedburner_origlink', article.get('guid', article.get('link')))
-        if ans is not None:
-            return ans.replace('?track=rss', '')
+        url = BasicNewsRecipe.get_article_url(self, article)
+        url = url.split('/')[-2]
+        encoding = {'0B': '.', '0C': '/', '0A': '0', '0F': '=', '0G': '&',
+                '0D': '?', '0E': '-', '0N': '.com', '0L': 'http://', '0S':
+                    'www.', '0I': '_', '0H': ','}
+        for k, v in encoding.iteritems():
+            url = url.replace(k, v)
+        return url.partition('?')[0]

    def skip_ad_pages(self, soup):
        text = soup.find(text='click here to continue to article')