diff --git a/recipes/foreignaffairs.recipe b/recipes/foreignaffairs.recipe index 69511cbd09..c7fa21b3e9 100644 --- a/recipes/foreignaffairs.recipe +++ b/recipes/foreignaffairs.recipe @@ -3,10 +3,17 @@ import re from calibre.ptempfile import PersistentTemporaryFile class ForeignAffairsRecipe(BasicNewsRecipe): + ''' there are three modifications: + 1) fetch issue cover + 2) toggle ignore premium articles + 3) extract proper section names, ie. "Comments", "Essay" + + by Chen Wei weichen302@gmx.com, 2012-02-05''' + __license__ = 'GPL v3' __author__ = 'kwetal' language = 'en' - version = 1 + version = 1.01 title = u'Foreign Affairs (Subcription or (free) Registration)' publisher = u'Council on Foreign Relations' @@ -17,6 +24,9 @@ class ForeignAffairsRecipe(BasicNewsRecipe): remove_javascript = True INDEX = 'http://www.foreignaffairs.com' + FRONTPAGE = 'http://www.foreignaffairs.com/magazine' + INCLUDE_PREMIUM = False + remove_tags = [] remove_tags.append(dict(name = 'base')) @@ -37,6 +47,12 @@ class ForeignAffairsRecipe(BasicNewsRecipe): temp_files = [] articles_are_obfuscated = True + def get_cover_url(self): + soup = self.index_to_soup(self.FRONTPAGE) + div = soup.find('div', attrs={'class':'inthemag-issuebuy-cover'}) + img_url = div.find('img')['src'] + return self.INDEX + img_url + def get_obfuscated_article(self, url): br = self.get_browser() br.open(url) @@ -50,57 +66,47 @@ class ForeignAffairsRecipe(BasicNewsRecipe): return self.temp_files[-1].name + def parse_index(self): - soup = self.index_to_soup('http://www.foreignaffairs.com/magazine') - articles = [] answer = [] - content = soup.find('div', attrs = {'class': 'center-wrapper'}) - if content: - for div in content.findAll('div', attrs = {'class': re.compile(r'view-row\s+views-row-[0-9]+\s+views-row-[odd|even].*')}): - tag = div.find('div', attrs = {'class': 'views-field-title'}) - if tag: - a = tag.find('a') - if a: - title = self.tag_to_string(a) - url = self.INDEX + a['href'] - - author = self.tag_to_string(div.find('div', attrs = {'class': 'views-field-field-article-display-authors-value'})) - tag = div.find('span', attrs = {'class': 'views-field-field-article-summary-value'}) - # If they ever fix their markup, this will break :-( - summary = self.tag_to_string(tag.findNextSibling('p')) - description = author + '
' + summary - - articles.append({'title': title, 'date': None, 'url': url, 'description': description}) - else: - continue - else: - continue - - answer.append(('Magazine', articles)) - - ul = content.find('ul') - if ul: + soup = self.index_to_soup(self.FRONTPAGE) + sec_start = soup.findAll('div', attrs={'class':'panel-separator'}) + for sec in sec_start: + content = sec.nextSibling + if content: + section = self.tag_to_string(content.find('h2')) articles = [] - for li in ul.findAll('li'): - tag = li.find('div', attrs = {'class': 'views-field-title'}) - if tag: - a = tag.find('a') - if a: - title = self.tag_to_string(a) - url = self.INDEX + a['href'] - description = '' - tag = li.find('div', attrs = {'class': 'views-field-field-article-display-authors-value'}) - if tag: - description = self.tag_to_string(tag) - articles.append({'title': title, 'date': None, 'url': url, 'description': description}) - else: - continue + tags = [] + for div in content.findAll('div', attrs = {'class': re.compile(r'view-row\s+views-row-[0-9]+\s+views-row-[odd|even].*')}): + tags.append(div) + ul = content.find('ul') + for li in content.findAll('li'): + tags.append(li) + + for div in tags: + title = url = description = author = None + + if self.INCLUDE_PREMIUM: + found_premium = False else: - continue - - answer.append(('Letters to the Editor', articles)) + found_premium = div.findAll('span', attrs={'class': + 'premium-icon'}) + if not found_premium: + tag = div.find('div', attrs={'class': 'views-field-title'}) + if tag: + a = tag.find('a') + if a: + title = self.tag_to_string(a) + url = self.INDEX + a['href'] + author = self.tag_to_string(div.find('div', attrs = {'class': 'views-field-field-article-display-authors-value'})) + tag_summary = div.find('span', attrs = {'class': 'views-field-field-article-summary-value'}) + description = self.tag_to_string(tag_summary) + articles.append({'title':title, 'date':None, 'url':url, + 'description':description, 'author':author}) + if articles: + answer.append((section, articles)) return answer def preprocess_html(self, soup):