#!/usr/bin/env python2 # -*- coding: utf-8 -*- __license__ = 'GPL v3' from calibre.web.feeds.news import BasicNewsRecipe from calibre import browser class NewYorker(BasicNewsRecipe): title = u'New Yorker Magazine' description = u'Content from the New Yorker website' masthead_url = 'http://www.newyorker.com/images/elements/print/newyorker_printlogo.gif' compress_news_images = True compress_news_images_auto_size = 8 scale_news_images_to_device = False scale_news_images = (768, 1024) url_list = [] language = 'en' __author__ = 'Krittika Goyal' no_stylesheets = True auto_cleanup = True timefmt = ' [%b %d]' encoding = 'utf-8' extra_css = ''' .byline { font-size:xx-small; font-weight: bold;} h3 { margin-bottom: 6px; } .caption { font-size: xx-small; font-style: italic; font-weight: normal; } ''' needs_subscription = 'optional' def parse_index(self): soup = self.index_to_soup('http://www.newyorker.com/magazine?intcid=magazine') ph = soup.find('div', attrs={'class':lambda x: x and 'cover-info' in x.split()}) if ph is not None: img = ph.find('img', src=True) if img is not None: self.cover_url = img['src'].replace('-320.jpg', '.jpg') articles = [] current_section = 'Current Issue' feeds = [] for story in soup.findAll(['h5', 'article']): if story.name == 'h5': if articles: feeds.append((current_section, articles)) current_section, articles = self.tag_to_string(story), [] self.log('\nFound section: ' + current_section) continue if story['itemtype'] != 'http://schema.org/Article': continue h2 = story.find('h2') url = h2.find('a', href=True)['href'] a = h2.find('a') title = self.tag_to_string(a) h3 = h2.findNextSibling('h3') desc = '' if h3 is not None: desc += self.tag_to_string(h3) p = h2.findNextSibling('p') if p is not None: desc += '. \n' + self.tag_to_string(p) self.log('Found article:', title) self.log('\t', url) self.log('\t', desc) articles.append({'title':title, 'url':url, 'date':'', 'description':desc}) return feeds # The New Yorker changes the content it delivers based on cookies, so the # following ensures that we send no cookies def get_browser(self, *args, **kwargs): return self def clone_browser(self, *args, **kwargs): return self.get_browser() def open_novisit(self, *args, **kwargs): br = browser() return br.open_novisit(*args, **kwargs) open = open_novisit