Update Frontline

2025-07-09 03:04:10 -04:00 · 2013-06-12 17:58:48 +05:30 · 2013-06-12 17:58:48 +05:30 · 2f8a25654b
commit 2f8a25654b
parent c07db5e194
1 changed files with 26 additions and 27 deletions
--- a/recipes/frontlineonnet.recipe
+++ b/recipes/frontlineonnet.recipe
@ -1,3 +1,4 @@
 __license__   = 'GPL v3'
 __copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
 '''
@ -5,7 +6,6 @@ frontlineonnet.com
 '''
 import re
 from calibre import strftime
 from calibre.web.feeds.news import BasicNewsRecipe
 class Frontlineonnet(BasicNewsRecipe):
@ -18,7 +18,7 @@ class Frontlineonnet(BasicNewsRecipe):
    delay                = 1
    INDEX                = 'http://frontlineonnet.com/'
    use_embedded_content = False
-    encoding             = 'cp1252'
+    encoding             = 'utf-8'
    language             = 'en_IN'
    publication_type     = 'magazine'
    masthead_url         = 'http://frontlineonnet.com/images/newfline.jpg'
@ -45,37 +45,36 @@ class Frontlineonnet(BasicNewsRecipe):
                         ]
    keep_only_tags= [
-                      dict(name='font', attrs={'class':'storyhead'})
+                      dict(name='div', attrs={'id':'content'})
-                     ,dict(attrs={'class':'byline'})
+                     #,dict(attrs={'class':'byline'})
                    ]
-    remove_attributes=['size','noshade','border']
+    #remove_attributes=['size','noshade','border']
-    def preprocess_html(self, soup):
+    #def preprocess_html(self, soup):
-        for item in soup.findAll(style=True):
+        #for item in soup.findAll(style=True):
-            del item['style']
+            #del item['style']
-        for item in soup.findAll('img'):
+        #for item in soup.findAll('img'):
-            if not item.has_key('alt'):
+            #if not item.has_key('alt'):
-               item['alt'] = 'image'
+               #item['alt'] = 'image'
-        return soup
+        #return soup
    def parse_index(self):
        articles = []
        soup = self.index_to_soup(self.INDEX)
-        for feed_link in soup.findAll('a',href=True):
+        for feed_link in soup.findAll('div', id='headseccol'):
-            if feed_link['href'].startswith('stories/'):
+            a = feed_link.find('a', href=True)
-                url   = self.INDEX + feed_link['href']
+            title = self.tag_to_string(a)
-                title = self.tag_to_string(feed_link)
+            url = a['href']
-                date  = strftime(self.timefmt)
+            articles.append({
-                articles.append({
+                              'title'      :title
-                                  'title'      :title
+                             ,'date'       :''
-                                 ,'date'       :date
+                             ,'url'        :url
-                                 ,'url'        :url
+                             ,'description':''
-                                 ,'description':''
+                            })
                                })
        return [('Frontline', articles)]
-    def print_version(self, url):
+    #def print_version(self, url):
-        return "http://www.hinduonnet.com/thehindu/thscrip/print.pl?prd=fline&file=" + url.rpartition('/')[2]
+        #return "http://www.hinduonnet.com/thehindu/thscrip/print.pl?prd=fline&file=" + url.rpartition('/')[2]
-    def image_url_processor(self, baseurl, url):
+    #def image_url_processor(self, baseurl, url):
-        return url.replace('../images/', self.INDEX + 'images/').strip()
+        #return url.replace('../images/', self.INDEX + 'images/').strip()