This commit is contained in:
Kovid Goyal 2012-06-07 12:21:38 +05:30
parent a7c509d9e7
commit 3212986d45
3 changed files with 6 additions and 7 deletions

View File

@ -5,7 +5,6 @@ www.csmonitor.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup
class CSMonitor(BasicNewsRecipe):
title = 'The Christian Science Monitor - daily'
@ -48,7 +47,7 @@ class CSMonitor(BasicNewsRecipe):
dict(name='h1', attrs={'class':'head'})
,dict(name='h2', attrs={'class':'subhead'})
,dict(attrs={'class':['sByline','podStoryGal','ui-body-header','sBody']})
]
]
remove_attributes=['xmlns:fb']
feeds = [
@ -67,7 +66,7 @@ class CSMonitor(BasicNewsRecipe):
,(u'The Home Forum', u'http://rss.csmonitor.com/feeds/homeforum' )
,(u'Articles' , u'http://rss.csmonitor.com/feeds/csarticles' )
]
def append_page(self, soup):
pager = soup.find('div', attrs={'class':'navigation'})
if pager:
@ -75,9 +74,9 @@ class CSMonitor(BasicNewsRecipe):
if nexttag:
nurl = 'http://www.csmonitor.com' + nexttag['href']
soup2 = self.index_to_soup(nurl)
texttag = soup2.find(attrs={'class':'sBody'})
texttag = soup2.find(attrs={'class':'sBody'})
if texttag:
appendtag = soup.find(attrs={'class':'sBody'})
appendtag = soup.find(attrs={'class':'sBody'})
for citem in texttag.findAll(attrs={'class':['podStoryRel','bottom-rel','hide']}):
citem.extract()
self.append_page(soup2)

View File

@ -8,6 +8,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1324038402(BasicNewsRecipe):
title = u'La Gazzetta del Mezzogiorno'
language = 'it'
__author__ = 'faber1971'
description = 'Italian regional magazine - Apulia'
oldest_article = 1

View File

@ -1,5 +1,4 @@
from calibre.web.feeds.news import BasicNewsRecipe
import re
class Polska_times(BasicNewsRecipe):
title = u'Polska Times'
__author__ = 'fenuks'
@ -27,4 +26,4 @@ class Polska_times(BasicNewsRecipe):
def get_cover_url(self):
soup = self.index_to_soup('http://www.prasa24.pl/gazeta/metropolia-warszawska/')
self.cover_url=soup.find(id='pojemnik').img['src']
return getattr(self, 'cover_url', self.cover_url)
return getattr(self, 'cover_url', self.cover_url)