Improve recipe for Journal of Nephrology

2025-08-30 23:00:21 -04:00 · 2010-01-24 18:36:48 -07:00 · 2010-01-24 18:36:48 -07:00 · 6027e401d7
commit 6027e401d7
parent 1b4ec12d6d
1 changed files with 54 additions and 12 deletions
--- a/resources/recipes/kidney.recipe
+++ b/resources/recipes/kidney.recipe
@ -45,10 +45,52 @@ class JASN(BasicNewsRecipe):
            raise ValueError('Failed to log in, is your account expired?')
        return br

-    feeds          = [
-        ('JASN',
-        'http://jasn.asnjournals.org/rss/current.xml'),
-    ]
+    #feeds          = [
+        #('JASN',
+        #'http://jasn.asnjournals.org/rss/current.xml'),
+    #]
+
+
+    #TO GET ARTICLE TOC
+    def jasn_get_index(self):
+        return self.index_to_soup('http://jasn.asnjournals.org/current.shtml')
+
+    # To parse artice toc
+    def parse_index(self):
+            parse_soup = self.jasn_get_index()
+            
+            div = parse_soup.find(id='tocBody')
+
+            current_section = None
+            current_articles = []
+            feeds = []
+            for x in div.findAll(True):
+                if x.name == 'h2':
+                    # Section heading found
+                    if current_articles and current_section:
+                        feeds.append((current_section, current_articles))
+                    current_section = self.tag_to_string(x)
+                    current_articles = []
+                    self.log('\tFound section:', current_section)
+                if current_section is not None and x.name == 'strong':
+                    title = self.tag_to_string(x)
+                    a = x.parent.parent.find('a', href=lambda x: x and '/full/' in x)
+                    if a is None:
+                        continue
+                    url = a.get('href', False)
+                    if not url or not title:
+                        continue
+                    if url.startswith('/'):
+                        url = 'http://jasn.asnjournals.org'+url
+                    self.log('\t\tFound article:', title)
+                    self.log('\t\t\t', url)
+                    current_articles.append({'title': title, 'url':url,
+                        'description':'', 'date':''})
+
+            if current_articles and current_section:
+                feeds.append((current_section, current_articles))
+
+            return feeds
            


@ -59,7 +101,7 @@ class JASN(BasicNewsRecipe):
            if not url:
                continue
            if url.startswith('/'):
-                url = 'http://jasn.asnjournals.org/'+url
+                url = 'http://jasn.asnjournals.org'+url
                isoup = self.index_to_soup(url)
                img = isoup.find('img', src=lambda x: x and
                x.startswith('/content/'))