2025-07-09 03:04:10 -04:00 · 2008-03-15 22:23:22 +00:00 · 2008-03-15 22:23:22 +00:00 · 3268180cad
commit 3268180cad
parent 5a76f5c2e1
3 changed files with 23 additions and 7 deletions
--- a/src/libprs500/web/feeds/news.py
+++ b/src/libprs500/web/feeds/news.py
@ -324,11 +324,9 @@ class BasicNewsRecipe(object):
            self.simultaneous_downloads = 1
            
        self.navbar = templates.NavBarTemplate()
-        self.html2lrf_options.extend(['--page-break-before', '$', '--use-spine'])
+        self.html2lrf_options.extend(['--page-break-before', '$', '--use-spine', '--header'])
        self.failed_downloads = []
        self.partial_failures = []
-        
-        
                
            
    def _postprocess_html(self, soup):
@ -347,9 +345,6 @@ class BasicNewsRecipe(object):
        @return: Path to index.html
        @rtype: string
        '''
-        self.report_progress(0, _('Trying to download cover...'))
-        
-        self.download_cover()
        res = self.build_index()
        self.cleanup()
        self.report_progress(1, _('Download finished'))
@ -426,7 +421,9 @@ class BasicNewsRecipe(object):
            self.report_progress(0, _('Got feeds from index page'))
        except NotImplementedError:
            feeds = self.parse_feeds()
-            
+        
+        self.report_progress(0, _('Trying to download cover...'))
+        self.download_cover()    
        if self.test:
            feeds = feeds[:2]
        self.has_single_feed = len(feeds) == 1
--- a/src/libprs500/web/feeds/recipes/atlantic.py
+++ b/src/libprs500/web/feeds/recipes/atlantic.py
@ -39,6 +39,12 @@ class TheAtlantic(BasicNewsRecipe):
        issue = soup.find('span', attrs={'class':'issue'})
        if issue:
            self.timefmt = ' [%s]'%self.tag_to_string(issue).rpartition('|')[-1].strip().replace('/', '-')
+            
+        cover = soup.find('img', alt='feature image', src=True)
+        if cover is not None:
+            self.cover_url = 'http://theatlantic.com'+cover['src']
+        else:
+            raise 'a'
        
        for item in soup.findAll('div', attrs={'class':'item'}):
            a = item.find('a')
--- a/src/libprs500/web/feeds/recipes/economist.py
+++ b/src/libprs500/web/feeds/recipes/economist.py
@ -20,6 +20,9 @@ economist.com
 from libprs500.web.feeds.news import BasicNewsRecipe
 from libprs500.ebooks.BeautifulSoup import BeautifulSoup

+import mechanize
+from urllib2 import quote
+
 class Economist(BasicNewsRecipe):
    
    title = 'The Economist'
@ -28,6 +31,16 @@ class Economist(BasicNewsRecipe):
    remove_tags = [dict(name=['script', 'noscript', 'title'])]
    remove_tags_before = dict(name=lambda tag: tag.name=='title' and tag.parent.name=='body')
    
+    def get_browser(self):
+        br = BasicNewsRecipe.get_browser(self)
+        if self.username is not None and self.password is not None:
+            req = mechanize.Request('http://www.economist.com/members/members.cfm?act=exec_login', headers={'Referer':'http://www.economist.com'})
+            data = 'logging_in=Y&returnURL=http%253A%2F%2Fwww.economist.com%2Findex.cfm&email_address=username&pword=password&x=7&y=11'
+            data = data.replace('username', quote(self.username)).replace('password', quote(self.password))
+            req.add_data()
+            br.open(req).read()
+        return br
+    
    def parse_index(self):
        soup = BeautifulSoup(self.browser.open(self.INDEX).read(), 
                             convertEntities=BeautifulSoup.HTML_ENTITIES)