From c32b3ed1c965f6ccee020844812d42a2d2a4e2cb Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 3 Jan 2010 15:40:13 -0700 Subject: [PATCH] New recipe for Foreign Affairs by kwetal --- resources/images/news/foreignaffairs.png | Bin 0 -> 738 bytes resources/recipes/foreignaffairs.recipe | 124 +++++++++++++++++++++++ 2 files changed, 124 insertions(+) create mode 100644 resources/images/news/foreignaffairs.png create mode 100644 resources/recipes/foreignaffairs.recipe diff --git a/resources/images/news/foreignaffairs.png b/resources/images/news/foreignaffairs.png new file mode 100644 index 0000000000000000000000000000000000000000..91e6440b2e15edc376c2c5e61f074cdf7629c025 GIT binary patch literal 738 zcmV<80v-K{P)Px#24YJ`L;(K){{a7>y{D4^000SaNLh0L01FcU01FcV0GgZ_00007bV*G`2igGx z6*>p;ggN#A00Lr3L_t(I%UzQ(Q{qq*g}*SC=Go_A2{ZIcS{Pz4`~X8O##Tq`3yMpTkMi$_jGsWo_puqdyZqRwH!ffO_pW6zrRxy1wsf;Pfs~N zKd0C0F&qw$+o>HF^ZA@C%gFPb{eF+N7Gn(e_xEhKTaqN9+wGF)`CpR#gNKI)+v#-d z&(Dvw*4pKAX}jI7Z8RFT)9KjRY-Zy)w#{bKHX02(o6W4X*46;hG-bJ55{4lFd7cvl z0YMNTgg|S}Znxv^?heoM0C;?SL@7nh7{hox=IZK-!C-(=in1&L==b{o2q7xs0mI?& z_kj6)jxh#f45QHqr4&IB03f9#2m&UP34_4^Aq4e$9RSbs@O_`!YPCX2iBgL1@9%%E zEQG*u9FinK2mydn3V`)`jg*p;{eDjrMYLKiR;yJ-PLc%A^SHdcP=H%w)hSh3?F@`V<$@3iF_fbmG>-EU<9IZ9u@fg>2d3$@q7=zXtDJ9P9>#N;t zHkFL&bV{10lx0~pwcBm#^?J4aV7NGrIoa)YY`0s!zP>1mf@ZUcwU+1SXBLYEo6QEG z`pn_YN+~`*KB)EkeYDmTMZv+voM)V{?T_Bbd1B*jWMj(Yl0vkiXxmt8nxEk z-riP!{vS_IPdJW)>$=r;A;jM_N-3F6r$kYN>$)7LyxnfoYPDD_7Dy?HqNp;~FF5^9 UketCJFaQ7m07*qoM6N<$f*i+J#sB~S literal 0 HcmV?d00001 diff --git a/resources/recipes/foreignaffairs.recipe b/resources/recipes/foreignaffairs.recipe new file mode 100644 index 0000000000..69511cbd09 --- /dev/null +++ b/resources/recipes/foreignaffairs.recipe @@ -0,0 +1,124 @@ +from calibre.web.feeds.news import BasicNewsRecipe +import re +from calibre.ptempfile import PersistentTemporaryFile + +class ForeignAffairsRecipe(BasicNewsRecipe): + __license__ = 'GPL v3' + __author__ = 'kwetal' + language = 'en' + version = 1 + + title = u'Foreign Affairs (Subcription or (free) Registration)' + publisher = u'Council on Foreign Relations' + category = u'USA, Foreign Affairs' + description = u'The leading forum for serious discussion of American foreign policy and international affairs.' + + no_stylesheets = True + remove_javascript = True + + INDEX = 'http://www.foreignaffairs.com' + + remove_tags = [] + remove_tags.append(dict(name = 'base')) + #remove_tags.append(dict(name = '', attrs = {'': ''})) + + remove_tags_before = dict(name = 'h1', attrs = {'class': 'print-title'}) + + remove_tags_after = dict(name = 'div', attrs = {'class': 'print-footer'}) + + extra_css = ''' + body{font-family:verdana,arial,helvetica,geneva,sans-serif;} + div.print-footer {font-size: x-small; color: #696969;} + ''' + + conversion_options = {'comments': description, 'tags': category, 'language': 'en', + 'publisher': publisher} + + temp_files = [] + articles_are_obfuscated = True + + def get_obfuscated_article(self, url): + br = self.get_browser() + br.open(url) + + response = br.follow_link(url_regex = r'/print/[0-9]+', nr = 0) + html = response.read() + + self.temp_files.append(PersistentTemporaryFile('_fa.html')) + self.temp_files[-1].write(html) + self.temp_files[-1].close() + + return self.temp_files[-1].name + + def parse_index(self): + soup = self.index_to_soup('http://www.foreignaffairs.com/magazine') + articles = [] + answer = [] + content = soup.find('div', attrs = {'class': 'center-wrapper'}) + if content: + for div in content.findAll('div', attrs = {'class': re.compile(r'view-row\s+views-row-[0-9]+\s+views-row-[odd|even].*')}): + tag = div.find('div', attrs = {'class': 'views-field-title'}) + if tag: + a = tag.find('a') + if a: + title = self.tag_to_string(a) + url = self.INDEX + a['href'] + + author = self.tag_to_string(div.find('div', attrs = {'class': 'views-field-field-article-display-authors-value'})) + tag = div.find('span', attrs = {'class': 'views-field-field-article-summary-value'}) + # If they ever fix their markup, this will break :-( + summary = self.tag_to_string(tag.findNextSibling('p')) + description = author + '
' + summary + + articles.append({'title': title, 'date': None, 'url': url, 'description': description}) + else: + continue + else: + continue + + answer.append(('Magazine', articles)) + + ul = content.find('ul') + if ul: + articles = [] + for li in ul.findAll('li'): + tag = li.find('div', attrs = {'class': 'views-field-title'}) + if tag: + a = tag.find('a') + if a: + title = self.tag_to_string(a) + url = self.INDEX + a['href'] + description = '' + tag = li.find('div', attrs = {'class': 'views-field-field-article-display-authors-value'}) + if tag: + description = self.tag_to_string(tag) + + articles.append({'title': title, 'date': None, 'url': url, 'description': description}) + else: + continue + else: + continue + + answer.append(('Letters to the Editor', articles)) + + return answer + + def preprocess_html(self, soup): + for img in soup.findAll('img', attrs = {'src': True}): + if not img['src'].startswith('http://'): + img['src'] = self.INDEX + img['src'] + + return soup + + needs_subscription = True + + def get_browser(self): + br = BasicNewsRecipe.get_browser() + if self.username is not None and self.password is not None: + br.open('https://www.foreignaffairs.com/user?destination=home') + br.select_form(nr = 1) + br['name'] = self.username + br['pass'] = self.password + br.submit() + return br +