diff --git a/recipes/grid_to.recipe b/recipes/grid_to.recipe new file mode 100644 index 0000000000..bd3146082e --- /dev/null +++ b/recipes/grid_to.recipe @@ -0,0 +1,79 @@ +from calibre.web.feeds.news import BasicNewsRecipe + +class TheGridTO(BasicNewsRecipe): + #: The title to use for the ebook + title = u'The Grid TO' + + #: A couple of lines that describe the content this recipe downloads. + #: This will be used primarily in a GUI that presents a list of recipes. + description = (u'The Grid is a weekly city magazine and daily website providing a fresh, ' + 'accessible voice for Toronto.') + + #: The author of this recipe + __author__ = u'Yusuf W' + + #: The language that the news is in. Must be an ISO-639 code either + #: two or three characters long + language = 'en_CA' + + #: Publication type + #: Set to newspaper, magazine or blog + publication_type = 'newspaper' + + #: Convenient flag to disable loading of stylesheets for websites + #: that have overly complex stylesheets unsuitable for conversion + #: to ebooks formats + #: If True stylesheets are not downloaded and processed + no_stylesheets = True + + #: List of tags to be removed. Specified tags are removed from downloaded HTML. + remove_tags_before = dict(name='div', id='content') + remove_tags_after = dict(name='div', id='content') + remove_tags = [ + dict(name='div', attrs={'class':'right-content pull-right'}), + dict(name='div', attrs={'class':'right-content'}), + dict(name='div', attrs={'class':'ftr-line'}), + dict(name='div', attrs={'class':'pull-right'}), + dict(name='div', id='comments'), + dict(name='div', id='tags') + ] + + #: Keep only the specified tags and their children. + #keep_only_tags = [dict(name='div', id='content')] + + cover_margins = (0, 0, '#ffffff') + + INDEX = 'http://www.thegridto.com' + + def get_cover_url(self): + soup = self.index_to_soup(self.INDEX) + cover_url = soup.find(attrs={'class':'article-block latest-issue'}).find('img')['src'] + + return cover_url + + def parse_index(self): + + # Get the latest issue + soup = self.index_to_soup(self.INDEX) + a = soup.find('div', attrs={'class': 'full-content stuff-ftr'}).findAll('a')[2] + + # Parse the index of the latest issue + self.INDEX = self.INDEX + a['href'] + soup = self.index_to_soup(self.INDEX) + + feeds = [] + for section in ['city', 'life', 'culture']: + section_class = 'left-content article-listing ' + section + ' pull-left' + div = soup.find(attrs={'class': section_class}) + + articles = [] + for tag in div.findAllNext(attrs={'class':'search-block'}): + a = tag.findAll('a', href=True)[1] + + title = self.tag_to_string(a) + url = a['href'] + + articles.append({'title': title, 'url': url, 'description':'', 'date':''}) + + feeds.append((section, articles)) + return feeds