The Galaxy's Edge by Krittika Goyal

2025-08-30 23:00:21 -04:00 · 2013-04-05 15:50:44 +05:30 · 2013-04-05 15:50:44 +05:30 · 9a1d1c4fee
commit 9a1d1c4fee
parent 053c84b57f
1 changed files with 108 additions and 0 deletions
--- a/recipes/galaxys_edge.recipe
+++ b/recipes/galaxys_edge.recipe
@ -0,0 +1,108 @@
+from __future__ import with_statement
+__license__ = 'GPL 3'
+__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class GalaxyEdge(BasicNewsRecipe):
+    title                 = u'The Galaxy\'s Edge'
+    language = 'en'
+
+    oldest_article        = 7
+    __author__            = 'Krittika Goyal'
+    no_stylesheets = True
+
+    auto_cleanup = True
+
+    #keep_only_tags = [dict(id='content')]
+    #remove_tags = [dict(attrs={'class':['article-links', 'breadcr']}),
+            #dict(id=['email-section', 'right-column', 'printfooter', 'topover',
+                     #'slidebox', 'th_footer'])]
+
+    extra_css = '.photo-caption { font-size: smaller }'
+
+    def parse_index(self):
+        soup = self.index_to_soup('http://www.galaxysedge.com/')
+        main = soup.find('table', attrs={'width':'911'})
+        toc = main.find('td', attrs={'width':'225'})
+
+
+
+        current_section = None
+        current_articles = []
+        feeds = []
+        c = 0
+        for x in toc.findAll(['p']):
+            c = c+1
+            if c == 5:
+                if current_articles and current_section:
+                    feeds.append((current_section, current_articles))
+                edwo = x.find('a')
+                current_section = self.tag_to_string(edwo)
+                current_articles = []
+                self.log('\tFound section:', current_section)
+                title = self.tag_to_string(edwo)
+                url = edwo.get('href', True)
+                url = 'http://www.galaxysedge.com/'+url
+                print(title)
+                print(c)
+                if not url or not title:
+                    continue
+                self.log('\t\tFound article:', title)
+                self.log('\t\t\t', url)
+                current_articles.append({'title': title, 'url':url,
+                    'description':'', 'date':''})
+            elif c>5:
+                current_section = self.tag_to_string(x.find('b'))
+                current_articles = []
+                self.log('\tFound section:', current_section)
+                for y in x.findAll('a'):
+                    title = self.tag_to_string(y)
+                    url = y.get('href', True)
+                    url = 'http://www.galaxysedge.com/'+url
+                    print(title)
+                    if not url or not title:
+                        continue
+                    self.log('\t\tFound article:', title)
+                    self.log('\t\t\t', url)
+                    current_articles.append({'title': title, 'url':url,
+                        'description':'', 'date':''})
+            if current_articles and current_section:
+                 feeds.append((current_section, current_articles))
+
+        return feeds
+
+
+
+
+    #def preprocess_raw_html(self, raw, url):
+        #return raw.replace('<body><p>', '<p>').replace('</p></body>', '</p>')
+
+    #def postprocess_html(self, soup, first_fetch):
+        #for t in soup.findAll(['table', 'tr', 'td','center']):
+            #t.name = 'div'
+        #return soup
+
+    #def parse_index(self):
+        #today = time.strftime('%Y-%m-%d')
+        #soup = self.index_to_soup(
+                #'http://www.thehindu.com/todays-paper/tp-index/?date=' + today)
+        #div = soup.find(id='left-column')
+        #feeds = []
+        #current_section = None
+        #current_articles = []
+        #for x in div.findAll(['h3', 'div']):
+            #if current_section and x.get('class', '') == 'tpaper':
+                #a = x.find('a', href=True)
+                #if a is not None:
+                    #current_articles.append({'url':a['href']+'?css=print',
+                        #'title':self.tag_to_string(a), 'date': '',
+                        #'description':''})
+            #if x.name == 'h3':
+                #if current_section and current_articles:
+                    #feeds.append((current_section, current_articles))
+                #current_section = self.tag_to_string(x)
+                #current_articles = []
+        #return feeds
+
+