from calibre import browser from calibre.web.feeds.news import BasicNewsRecipe import re class AdvancedUserRecipe1325006965(BasicNewsRecipe): title = u'Countryfile.com' __author__ = 'Dave Asbury' description = 'The official website of Countryfile Magazine' # last updated 24.10.14 language = 'en_GB' oldest_article = 30 max_articles_per_feed = 25 remove_empty_feeds = True no_stylesheets = True auto_cleanup = True compress_news_images = True ignore_duplicate_articles = {'title', 'url'} def get_cover_url(self): soup = self.index_to_soup('http://www.countryfile.com/magazine') cov = soup.find(attrs={'class': re.compile( 'imagecache imagecache-250px')}) # 'width' : '160', cov = str(cov) cov = cov[10:] cov = cov[:-135] # print '++++ ',cov,' ++++' br = browser() br.set_handle_redirect(False) try: br.open_novisit(cov) cover_url = cov except: cover_url = 'http://www.countryfile.com/sites/default/files/imagecache/160px_wide/cover/2_1.jpg' return cover_url preprocess_regexps = [ (re.compile(r' \| Countryfile.com', re.IGNORECASE | re.DOTALL), lambda match: '')] feeds = [ (u'Country News', u'http://www.feed43.com/7204505705648666.xml'), (u'Articles', u'http://www.feed43.com/8542080013204443.xml'), ]