diff --git a/resources/images/news/wapo_cartoons.png b/resources/images/news/wapo_cartoons.png new file mode 100644 index 0000000000..5722cf5416 Binary files /dev/null and b/resources/images/news/wapo_cartoons.png differ diff --git a/resources/recipes/wapo_cartoons.recipe b/resources/recipes/wapo_cartoons.recipe new file mode 100644 index 0000000000..78440aa140 --- /dev/null +++ b/resources/recipes/wapo_cartoons.recipe @@ -0,0 +1,145 @@ +from calibre.web.feeds.news import BasicNewsRecipe +from calibre.ebooks.BeautifulSoup import BeautifulSoup +from datetime import date, timedelta + +class WaPoCartoonsRecipe(BasicNewsRecipe): + __license__ = 'GPL v3' + __author__ = 'kwetal' + language = 'en' + version = 2 + + title = u'Washington Post Cartoons' + publisher = u'Washington Post' + category = u'News, Cartoons' + description = u'Cartoons from the Washington Post' + + oldest_article = 7 + max_articles_per_feed = 100 + use_embedded_content = False + no_stylesheets = True + + feeds = [] + feeds.append((u'Anderson', u'http://www.uclick.com/client/wpc/wpnan/')) + feeds.append((u'Auth', u'http://www.uclick.com/client/wpc/ta/')) + feeds.append((u'Bok', u'http://www.creators.com/featurepages/11_editorialcartoons_chip-bok.html?name=cb')) + feeds.append((u'Carlson', u'http://www.uclick.com/client/wpc/sc/')) + feeds.append((u'Luckovich', u'http://www.creators.com/featurepages/11_editorialcartoons_mike-luckovich.html?name=lk')) + feeds.append((u'McCoy', u'http://www.uclick.com/client/wpc/gm/')) + feeds.append((u'Pat Oliphant', u'http://www.uclick.com/client/wpc/po/')) + feeds.append((u'Sargent', u'http://wpcomics.washingtonpost.com/client/wpc/bs/')) + feeds.append((u'Wilkinson', u'http://www.uclick.com/client/wpc/wpswi/')) + + extra_css = ''' + body {font-family: verdana, arial, helvetica, geneva, sans-serif;} + h1 {font-size: medium; font-weight: bold; margin-bottom: -0.1em; padding: 0em; text-align: left;} + #name {margin-bottom: 0.2em} + #copyright {font-size: xx-small; color: #696969; text-align: right; margin-top: 0.2em;} + ''' + + def parse_index(self): + index = [] + oldestDate = date.today() - timedelta(days = self.oldest_article) + oldest = oldestDate.strftime('%Y%m%d') + for feed in self.feeds: + cartoons = [] + soup = self.index_to_soup(feed[1]) + + cartoon = {'title': 'Current', 'date': None, 'url': feed[1], 'description' : ''} + cartoons.append(cartoon) + + select = soup.find('select', attrs = {'name': ['url', 'dest']}) + if select: + cartoonCandidates = [] + if select['name'] == 'url': + cartoonCandidates = self.cartoonCandidatesWaPo(select, oldest) + else: + cartoonCandidates = self.cartoonCandidatesCreatorsCom(select, oldest) + + for cartoon in cartoonCandidates: + cartoons.append(cartoon) + + index.append([feed[0], cartoons]) + + return index + + def preprocess_html(self, soup): + freshSoup = self.getFreshSoup(soup) + + div = soup.find('div', attrs = {'id': 'name'}) + if div: + freshSoup.body.append(div) + comic = soup.find('div', attrs = {'id': 'comic_full'}) + + img = comic.find('img') + if '&' in img['src']: + img['src'], sep, bad = img['src'].rpartition('&') + + freshSoup.body.append(comic) + freshSoup.body.append(soup.find('div', attrs = {'id': 'copyright'})) + else: + span = soup.find('span', attrs = {'class': 'title'}) + if span: + del span['class'] + span['id'] = 'name' + span.name = 'div' + freshSoup.body.append(span) + + img = soup.find('img', attrs = {'class': 'pic_big'}) + if img: + td = img.parent + if td.has_key('style'): + del td['style'] + td.name = 'div' + td['id'] = 'comic_full' + freshSoup.body.append(td) + + td = soup.find('td', attrs = {'class': 'copy'}) + if td: + for a in td.find('a'): + a.extract() + del td['class'] + td['id'] = 'copyright' + td.name = 'div' + freshSoup.body.append(td) + + return freshSoup + + def getFreshSoup(self, oldSoup): + freshSoup = BeautifulSoup('