from calibre.web.feeds.news import BasicNewsRecipe class FIELDSTREAM(BasicNewsRecipe): title = 'Field and Stream' __author__ = 'Starson17 and Tonythebookworm' description = 'Hunting and Fishing and Gun Talk' language = 'en' no_stylesheets = True publisher = 'Starson17 and Tonythebookworm' category = 'food recipes, hunting, fishing, guns' use_embedded_content = False no_stylesheets = True oldest_article = 24 remove_javascript = True remove_empty_feeds = True cover_url = 'http://www.arrowheadflyangler.com/Portals/1/Articles/FieldStream/Field%20and%20Stream%20March%20Fishing%20Edition%20Article%20Cover.jpg' # noqa max_articles_per_feed = 10 INDEX = 'http://www.fieldandstream.com' keep_only_tags = [ dict(name='div', attrs={'class': ['article-wrapper']}), ] remove_tags = [ dict(name='div', attrs={ 'class': lambda x: x and 'content-main-bottom' in x.split()}), dict(name='div', attrs={ 'class': lambda x: x and 'pw-widget' in x.split()}), ] def preprocess_html(self, soup): for img in soup.findAll('img', attrs={'data-src': True}): img['src'] = img['data-src'] for form in soup.findAll('form'): form.parent.extract() return soup def parse_index(self): feeds = [] num = self.test[0] if self.test else 100 for title, url in [ ('Field Test', 'http://www.fieldandstream.com/blogs/field-test'), (u"Wild Chef", u"http://www.fieldandstream.com/blogs/wild-chef"), (u"The Gun Nuts", u"http://www.fieldandstream.com/blogs/gun-nut"), (u"Whitetail 365", u"http://www.fieldandstream.com/blogs/whitetail-365"), ('Field Notes', 'http://www.fieldandstream.com/blogs/field-notes'), (u"Fly Talk", u"http://www.fieldandstream.com/blogs/flytalk"), (u"The Conservationist", u"http://www.fieldandstream.com/blogs/conservationist"), ('The Lateral Line', 'http://www.fieldandstream.com/blogs/lateral-line'), ('Total Outdoorsman', 'http://www.fieldandstream.com/blogs/total-outdoorsman'), ('A Sportsman\'s Life', 'http://www.fieldandstream.com/blogs/a-sportsmans-life'), ]: self.log('Section:', title) articles = self.make_links(url) if articles: feeds.append((title, articles)) if len(feeds) > num: break return feeds def make_links(self, url): current_articles = [] soup = self.index_to_soup(url) for item in soup.findAll('h2'): link = item.find('a') if link: url = self.INDEX + link['href'] title = self.tag_to_string(link) self.log('\t', title, 'at', url) current_articles.append( {'title': title, 'url': url, 'description': '', 'date': ''}) return current_articles