Fix #1012903 (Updated recipe for The Christian Science Monitor)

2025-07-09 03:04:10 -04:00 · 2012-06-14 09:07:21 +05:30 · 2012-06-14 09:07:21 +05:30 · 462945fd39
commit 462945fd39
parent 219de5c4ea
1 changed files with 8 additions and 5 deletions
--- a/recipes/chr_mon.recipe
+++ b/recipes/chr_mon.recipe
@ -4,6 +4,7 @@ __copyright__ = '2012, Darko Miletic <darko.miletic at gmail.com>'
 www.csmonitor.com
 '''
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
 class CSMonitor(BasicNewsRecipe):
@ -40,13 +41,15 @@ class CSMonitor(BasicNewsRecipe):
    remove_tags = [
                     dict(name=['meta','link','iframe','object','embed'])
-                    ,dict(attrs={'class':['podStoryRel','bottom-rel','hide']})
+                    ,dict(attrs={'class':re.compile('(^|| )podStoryRel($|| )', re.DOTALL)})
                    ,dict(attrs={'class':['bottom-rel','hide']})
                    ,dict(attrs={'id':['pgallerycarousel_enlarge','pgallerycarousel_related']})
                  ]
    keep_only_tags = [
                        dict(name='h1', attrs={'class':'head'})
                       ,dict(name='h2', attrs={'class':'subhead'})
-                       ,dict(attrs={'class':['sByline','podStoryGal','ui-body-header','sBody']})
+                       ,dict(attrs={'class':['sByline','thePhoto','ui-body-header']})
                       ,dict(attrs={'class':re.compile('(^|| )sBody($|| )', re.DOTALL)})
                     ]
    remove_attributes=['xmlns:fb']
@ -74,10 +77,10 @@ class CSMonitor(BasicNewsRecipe):
           if nexttag:
              nurl = 'http://www.csmonitor.com' + nexttag['href']
              soup2 = self.index_to_soup(nurl)
-              texttag = soup2.find(attrs={'class':'sBody'})
+              texttag = soup2.find(attrs={'class':re.compile('(^|| )sBody($|| )', re.DOTALL)})
              if texttag:
-                  appendtag = soup.find(attrs={'class':'sBody'})
+                  appendtag = soup.find(attrs={'class':re.compile('(^|| )sBody($|| )', re.DOTALL)})
-                  for citem in texttag.findAll(attrs={'class':['podStoryRel','bottom-rel','hide']}):
+                  for citem in texttag.findAll(attrs={'class':[re.compile('(^|| )podStoryRel($|| )', re.DOTALL),'bottom-rel','hide']}):
                      citem.extract()
                  self.append_page(soup2)
                  texttag.extract()