from __future__ import print_function from calibre.web.feeds.news import BasicNewsRecipe import re import mechanize class AdvancedUserRecipe1306097511(BasicNewsRecipe): title = u'Birmingham post' description = 'Author D.Asbury. News for Birmingham UK' __author__ = 'Dave Asbury' cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/161987_9010212100_2035706408_n.jpg' oldest_article = 2 max_articles_per_feed = 20 linearize_tables = True remove_empty_feeds = True remove_javascript = True no_stylesheets = True auto_cleanup = True language = 'en_GB' compress_news_images = True cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/161987_9010212100_2035706408_n.jpg' masthead_url = 'http://www.trinitymirror.com/images/birminghampost-logo.gif' def get_cover_url(self): soup = self.index_to_soup('http://www.birminghampost.net') # look for the block containing the sun button and url cov = soup.find(attrs={'height': re.compile( '3'), 'alt': re.compile('Post')}) print() print('%%%%%%%%%%%%%%%', cov) print() cov2 = str(cov['src']) print('88888888 ', cov2, ' 888888888888') # cover_url=cov2 # return cover_url br = mechanize.Browser() br.set_handle_redirect(False) try: br.open_novisit(cov2) cover_url = cov2 except: cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/161987_9010212100_2035706408_n.jpg' return cover_url feeds = [ (u'West Mids. News', u'http://www.birminghampost.net/news/west-midlands-news/rss.xml'), (u'UK News', u'http://www.birminghampost.net/news/uk-news/rss.xml'), (u'Sports', u'http://www.birminghampost.net/midlands-birmingham-sport/rss.xml'), (u'Bloggs & Comments', u'http://www.birminghampost.net/comment/rss.xml') ]