mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Fix #1012903 (Updated recipe for The Christian Science Monitor)
This commit is contained in:
parent
219de5c4ea
commit
462945fd39
@ -4,6 +4,7 @@ __copyright__ = '2012, Darko Miletic <darko.miletic at gmail.com>'
|
|||||||
www.csmonitor.com
|
www.csmonitor.com
|
||||||
'''
|
'''
|
||||||
|
|
||||||
|
import re
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class CSMonitor(BasicNewsRecipe):
|
class CSMonitor(BasicNewsRecipe):
|
||||||
@ -40,13 +41,15 @@ class CSMonitor(BasicNewsRecipe):
|
|||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name=['meta','link','iframe','object','embed'])
|
dict(name=['meta','link','iframe','object','embed'])
|
||||||
,dict(attrs={'class':['podStoryRel','bottom-rel','hide']})
|
,dict(attrs={'class':re.compile('(^|| )podStoryRel($|| )', re.DOTALL)})
|
||||||
|
,dict(attrs={'class':['bottom-rel','hide']})
|
||||||
,dict(attrs={'id':['pgallerycarousel_enlarge','pgallerycarousel_related']})
|
,dict(attrs={'id':['pgallerycarousel_enlarge','pgallerycarousel_related']})
|
||||||
]
|
]
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
dict(name='h1', attrs={'class':'head'})
|
dict(name='h1', attrs={'class':'head'})
|
||||||
,dict(name='h2', attrs={'class':'subhead'})
|
,dict(name='h2', attrs={'class':'subhead'})
|
||||||
,dict(attrs={'class':['sByline','podStoryGal','ui-body-header','sBody']})
|
,dict(attrs={'class':['sByline','thePhoto','ui-body-header']})
|
||||||
|
,dict(attrs={'class':re.compile('(^|| )sBody($|| )', re.DOTALL)})
|
||||||
]
|
]
|
||||||
remove_attributes=['xmlns:fb']
|
remove_attributes=['xmlns:fb']
|
||||||
|
|
||||||
@ -74,10 +77,10 @@ class CSMonitor(BasicNewsRecipe):
|
|||||||
if nexttag:
|
if nexttag:
|
||||||
nurl = 'http://www.csmonitor.com' + nexttag['href']
|
nurl = 'http://www.csmonitor.com' + nexttag['href']
|
||||||
soup2 = self.index_to_soup(nurl)
|
soup2 = self.index_to_soup(nurl)
|
||||||
texttag = soup2.find(attrs={'class':'sBody'})
|
texttag = soup2.find(attrs={'class':re.compile('(^|| )sBody($|| )', re.DOTALL)})
|
||||||
if texttag:
|
if texttag:
|
||||||
appendtag = soup.find(attrs={'class':'sBody'})
|
appendtag = soup.find(attrs={'class':re.compile('(^|| )sBody($|| )', re.DOTALL)})
|
||||||
for citem in texttag.findAll(attrs={'class':['podStoryRel','bottom-rel','hide']}):
|
for citem in texttag.findAll(attrs={'class':[re.compile('(^|| )podStoryRel($|| )', re.DOTALL),'bottom-rel','hide']}):
|
||||||
citem.extract()
|
citem.extract()
|
||||||
self.append_page(soup2)
|
self.append_page(soup2)
|
||||||
texttag.extract()
|
texttag.extract()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user