mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Fix #1012903 (Updated recipe for The Christian Science Monitor)
This commit is contained in:
parent
219de5c4ea
commit
462945fd39
@ -4,6 +4,7 @@ __copyright__ = '2012, Darko Miletic <darko.miletic at gmail.com>'
|
||||
www.csmonitor.com
|
||||
'''
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class CSMonitor(BasicNewsRecipe):
|
||||
@ -40,13 +41,15 @@ class CSMonitor(BasicNewsRecipe):
|
||||
|
||||
remove_tags = [
|
||||
dict(name=['meta','link','iframe','object','embed'])
|
||||
,dict(attrs={'class':['podStoryRel','bottom-rel','hide']})
|
||||
,dict(attrs={'class':re.compile('(^|| )podStoryRel($|| )', re.DOTALL)})
|
||||
,dict(attrs={'class':['bottom-rel','hide']})
|
||||
,dict(attrs={'id':['pgallerycarousel_enlarge','pgallerycarousel_related']})
|
||||
]
|
||||
keep_only_tags = [
|
||||
dict(name='h1', attrs={'class':'head'})
|
||||
,dict(name='h2', attrs={'class':'subhead'})
|
||||
,dict(attrs={'class':['sByline','podStoryGal','ui-body-header','sBody']})
|
||||
,dict(attrs={'class':['sByline','thePhoto','ui-body-header']})
|
||||
,dict(attrs={'class':re.compile('(^|| )sBody($|| )', re.DOTALL)})
|
||||
]
|
||||
remove_attributes=['xmlns:fb']
|
||||
|
||||
@ -74,10 +77,10 @@ class CSMonitor(BasicNewsRecipe):
|
||||
if nexttag:
|
||||
nurl = 'http://www.csmonitor.com' + nexttag['href']
|
||||
soup2 = self.index_to_soup(nurl)
|
||||
texttag = soup2.find(attrs={'class':'sBody'})
|
||||
texttag = soup2.find(attrs={'class':re.compile('(^|| )sBody($|| )', re.DOTALL)})
|
||||
if texttag:
|
||||
appendtag = soup.find(attrs={'class':'sBody'})
|
||||
for citem in texttag.findAll(attrs={'class':['podStoryRel','bottom-rel','hide']}):
|
||||
appendtag = soup.find(attrs={'class':re.compile('(^|| )sBody($|| )', re.DOTALL)})
|
||||
for citem in texttag.findAll(attrs={'class':[re.compile('(^|| )podStoryRel($|| )', re.DOTALL),'bottom-rel','hide']}):
|
||||
citem.extract()
|
||||
self.append_page(soup2)
|
||||
texttag.extract()
|
||||
|
Loading…
x
Reference in New Issue
Block a user