from calibre.web.feeds.news import BasicNewsRecipe from calibre.ebooks.BeautifulSoup import BeautifulSoup class JSOnline(BasicNewsRecipe): title = u'Milwaukee Journal Sentinel' language = 'en' __author__ = 'Krittika Goyal' oldest_article = 2 # days max_articles_per_feed = 25 no_stylesheets = True remove_tags_before = dict(name='div', attrs={'id': 'wrapper'}) remove_tags = [ dict(name='iframe'), dict(name='div', attrs={'class': [ 'right_float', 'headlines', 'side_section_container poll', 'side_section_container html']}), ] feeds = [ ('Main Headlines', 'http://www.jsonline.com/rss?c=y&path=%2F'), ('Business', 'http://www.jsonline.com/rss?c=y&path=%2Fbusiness'), ('Milwaukee marketplace', 'http://www.jsonline.com/rss?c=y&path=%2Fmarketplace'), ('Top Entertainment Stories', 'http://www.jsonline.com/rss?c=y&path=%2Fentertainment%2Ftopstories'), ('Arts and Books', 'http://www.jsonline.com/rss?c=y&path=%2Fentertainment%2Farts'), ('Movies', 'http://www.jsonline.com/rss?c=y&path=%2Fentertainment%2Fmovies'), ('Music and Nightlife', 'http://www.jsonline.com/rss?c=y&path=%2Fentertainment%2Fmusicandnightlife'), ('Dining', 'http://www.jsonline.com/rss?c=y&path=%2Ffeatures%2Fdining'), ('Fashion', 'http://www.jsonline.com/rss?c=y&path=%2Ffeatures%2Ffashion'), ('Health and Fitness', 'http://www.jsonline.com/rss?c=y&path=%2Ffeatures%2Fhealth'), ('Top Metro Stories', 'http://www.jsonline.com/rss?c=y&path=%2Fnews%2Ftopstories'), ('Crime', 'http://www.jsonline.com/rss?c=y&path=%2Fnews%2Fcrime'), ('Sports', 'http://www.jsonline.com/rss?c=y&path=%2Fsports'), ] # def print_version(self, url): # return url+'/0' def preprocess_html(self, soup): story = soup.find(name='div', attrs={'id': 'mainContent'}) soup = BeautifulSoup( 't') body = soup.find(name='body') body.insert(0, story) return soup