calibre/recipes/birmingham_post.recipe
2013-04-04 20:08:42 +05:30

55 lines
2.0 KiB
Plaintext

from calibre.web.feeds.news import BasicNewsRecipe
import re
import mechanize
class AdvancedUserRecipe1306097511(BasicNewsRecipe):
title = u'Birmingham post'
description = 'Author D.Asbury. News for Birmingham UK'
#timefmt = ''
__author__ = 'Dave Asbury'
cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/161987_9010212100_2035706408_n.jpg'
oldest_article = 2
max_articles_per_feed = 20
linearize_tables = True
remove_empty_feeds = True
remove_javascript = True
no_stylesheets = True
auto_cleanup = True
language = 'en_GB'
compress_news_images = True
cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/161987_9010212100_2035706408_n.jpg'
masthead_url = 'http://www.trinitymirror.com/images/birminghampost-logo.gif'
def get_cover_url(self):
soup = self.index_to_soup('http://www.birminghampost.net')
# look for the block containing the sun button and url
cov = soup.find(attrs={'height' : re.compile('3'), 'alt' : re.compile('Post')})
print
print '%%%%%%%%%%%%%%%',cov
print
cov2 = str(cov['src'])
# cov2=cov2[7:]
print '88888888 ',cov2,' 888888888888'
#cover_url=cov2
#return cover_url
br = mechanize.Browser()
br.set_handle_redirect(False)
try:
br.open_novisit(cov2)
cover_url = cov2
except:
cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/161987_9010212100_2035706408_n.jpg'
return cover_url
feeds = [
#(u'News',u'http://www.birminghampost.net/news/rss.xml'),
(u'West Mids. News', u'http://www.birminghampost.net/news/west-midlands-news/rss.xml'),
(u'UK News', u'http://www.birminghampost.net/news/uk-news/rss.xml'),
(u'Sports',u'http://www.birminghampost.net/midlands-birmingham-sport/rss.xml'),
(u'Bloggs & Comments',u'http://www.birminghampost.net/comment/rss.xml')
]