From 6027e401d7b4b6ce33146bc5ac99a3fa28c8a6dc Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 24 Jan 2010 18:36:48 -0700
Subject: [PATCH] Improve recipe for Journal of Nephrology

---
 resources/recipes/kidney.recipe | 66 +++++++++++++++++++++++++++------
 1 file changed, 54 insertions(+), 12 deletions(-)

diff --git a/resources/recipes/kidney.recipe b/resources/recipes/kidney.recipe
index e3c75072ee..15bc5f59ed 100644
--- a/resources/recipes/kidney.recipe
+++ b/resources/recipes/kidney.recipe
@@ -15,13 +15,13 @@ class JASN(BasicNewsRecipe):
     remove_tags_before = dict(name='h2')
     #remove_tags_after  = dict(name='th', attrs={'align':'left'})
     remove_tags = [
-       dict(name='iframe'),
+        dict(name='iframe'),
        #dict(name='div', attrs={'class':'related-articles'}),
-       dict(name='td', attrs={'id':['jasnFooter']}),
-       dict(name='table', attrs={'id':"jasnNavBar"}),
-       dict(name='table', attrs={'class':'content_box_outer_table'}),
-       dict(name='th', attrs={'align':'left'})
-    ]
+        dict(name='td', attrs={'id':['jasnFooter']}),
+        dict(name='table', attrs={'id':"jasnNavBar"}),
+        dict(name='table', attrs={'class':'content_box_outer_table'}),
+        dict(name='th', attrs={'align':'left'})
+       ]
 
 
 
@@ -45,12 +45,54 @@ class JASN(BasicNewsRecipe):
             raise ValueError('Failed to log in, is your account expired?')
         return br
 
-    feeds          = [
-        ('JASN',
-        'http://jasn.asnjournals.org/rss/current.xml'),
-    ]
+    #feeds          = [
+        #('JASN',
+        #'http://jasn.asnjournals.org/rss/current.xml'),
+    #]
 
 
+    #TO GET ARTICLE TOC
+    def jasn_get_index(self):
+        return self.index_to_soup('http://jasn.asnjournals.org/current.shtml')
+
+    # To parse artice toc
+    def parse_index(self):
+            parse_soup = self.jasn_get_index()
+            
+            div = parse_soup.find(id='tocBody')
+
+            current_section = None
+            current_articles = []
+            feeds = []
+            for x in div.findAll(True):
+                if x.name == 'h2':
+                    # Section heading found
+                    if current_articles and current_section:
+                        feeds.append((current_section, current_articles))
+                    current_section = self.tag_to_string(x)
+                    current_articles = []
+                    self.log('\tFound section:', current_section)
+                if current_section is not None and x.name == 'strong':
+                    title = self.tag_to_string(x)
+                    a = x.parent.parent.find('a', href=lambda x: x and '/full/' in x)
+                    if a is None:
+                        continue
+                    url = a.get('href', False)
+                    if not url or not title:
+                        continue
+                    if url.startswith('/'):
+                        url = 'http://jasn.asnjournals.org'+url
+                    self.log('\t\tFound article:', title)
+                    self.log('\t\t\t', url)
+                    current_articles.append({'title': title, 'url':url,
+                        'description':'', 'date':''})
+
+            if current_articles and current_section:
+                feeds.append((current_section, current_articles))
+
+            return feeds
+            
+
 
     def preprocess_html(self, soup):
         for a in soup.findAll(text=lambda x: x and '[in this window]' in x):
@@ -59,7 +101,7 @@ class JASN(BasicNewsRecipe):
             if not url:
                 continue
             if url.startswith('/'):
-                url = 'http://jasn.asnjournals.org/'+url
+                url = 'http://jasn.asnjournals.org'+url
                 isoup = self.index_to_soup(url)
                 img = isoup.find('img', src=lambda x: x and
                 x.startswith('/content/'))
@@ -70,4 +112,4 @@ class JASN(BasicNewsRecipe):
         return soup
 
 
-
+    
\ No newline at end of file