diff --git a/src/libprs500/web/feeds/news.py b/src/libprs500/web/feeds/news.py index 3b82418f0d..a4d94e5427 100644 --- a/src/libprs500/web/feeds/news.py +++ b/src/libprs500/web/feeds/news.py @@ -324,11 +324,9 @@ class BasicNewsRecipe(object): self.simultaneous_downloads = 1 self.navbar = templates.NavBarTemplate() - self.html2lrf_options.extend(['--page-break-before', '$', '--use-spine']) + self.html2lrf_options.extend(['--page-break-before', '$', '--use-spine', '--header']) self.failed_downloads = [] self.partial_failures = [] - - def _postprocess_html(self, soup): @@ -347,9 +345,6 @@ class BasicNewsRecipe(object): @return: Path to index.html @rtype: string ''' - self.report_progress(0, _('Trying to download cover...')) - - self.download_cover() res = self.build_index() self.cleanup() self.report_progress(1, _('Download finished')) @@ -426,7 +421,9 @@ class BasicNewsRecipe(object): self.report_progress(0, _('Got feeds from index page')) except NotImplementedError: feeds = self.parse_feeds() - + + self.report_progress(0, _('Trying to download cover...')) + self.download_cover() if self.test: feeds = feeds[:2] self.has_single_feed = len(feeds) == 1 diff --git a/src/libprs500/web/feeds/recipes/atlantic.py b/src/libprs500/web/feeds/recipes/atlantic.py index 6632e83e12..7cea7fa510 100644 --- a/src/libprs500/web/feeds/recipes/atlantic.py +++ b/src/libprs500/web/feeds/recipes/atlantic.py @@ -39,6 +39,12 @@ class TheAtlantic(BasicNewsRecipe): issue = soup.find('span', attrs={'class':'issue'}) if issue: self.timefmt = ' [%s]'%self.tag_to_string(issue).rpartition('|')[-1].strip().replace('/', '-') + + cover = soup.find('img', alt='feature image', src=True) + if cover is not None: + self.cover_url = 'http://theatlantic.com'+cover['src'] + else: + raise 'a' for item in soup.findAll('div', attrs={'class':'item'}): a = item.find('a') diff --git a/src/libprs500/web/feeds/recipes/economist.py b/src/libprs500/web/feeds/recipes/economist.py index 33407fa04a..c1b6a46974 100644 --- a/src/libprs500/web/feeds/recipes/economist.py +++ b/src/libprs500/web/feeds/recipes/economist.py @@ -20,6 +20,9 @@ economist.com from libprs500.web.feeds.news import BasicNewsRecipe from libprs500.ebooks.BeautifulSoup import BeautifulSoup +import mechanize +from urllib2 import quote + class Economist(BasicNewsRecipe): title = 'The Economist' @@ -28,6 +31,16 @@ class Economist(BasicNewsRecipe): remove_tags = [dict(name=['script', 'noscript', 'title'])] remove_tags_before = dict(name=lambda tag: tag.name=='title' and tag.parent.name=='body') + def get_browser(self): + br = BasicNewsRecipe.get_browser(self) + if self.username is not None and self.password is not None: + req = mechanize.Request('http://www.economist.com/members/members.cfm?act=exec_login', headers={'Referer':'http://www.economist.com'}) + data = 'logging_in=Y&returnURL=http%253A%2F%2Fwww.economist.com%2Findex.cfm&email_address=username&pword=password&x=7&y=11' + data = data.replace('username', quote(self.username)).replace('password', quote(self.password)) + req.add_data() + br.open(req).read() + return br + def parse_index(self): soup = BeautifulSoup(self.browser.open(self.INDEX).read(), convertEntities=BeautifulSoup.HTML_ENTITIES)