Fix #892860 (Updated recipe for NIN online)

2025-07-09 03:04:10 -04:00 · 2011-11-21 08:02:24 +05:30 · 2011-11-21 08:02:24 +05:30 · 408a71c485
commit 408a71c485
parent 0d7ca78a5e
1 changed files with 5 additions and 53 deletions
--- a/recipes/nin.recipe
+++ b/recipes/nin.recipe
@ -80,59 +80,11 @@ class Nin(BasicNewsRecipe):
                   return self.PREFIX + item.img['src']
        return cover_url

-    def parse_index(self):
-        articles = []
-        count = 0
-        soup = self.index_to_soup(self.INDEX)
-        for item in soup.findAll('a',attrs={'class':'lmeninavFont'}):
-            count = count +1
-            if self.test and count > 2:
-               return articles
-            section  = self.tag_to_string(item)
-            feedlink = self.PREFIX + item['href']
-            feedpage = self.index_to_soup(feedlink)
-            self.report_progress(0, _('Fetching feed')+' %s...'%(section))
-            inarts   = []
-            for art in feedpage.findAll('span',attrs={'class':'artTitle'}):
-                alink = art.parent
-                url   = self.PREFIX + alink['href']
-                title = self.tag_to_string(art)
-                sparent = alink.parent
-                alink.extract()
-                description = self.tag_to_string(sparent)
-                date = strftime(self.timefmt)
-                inarts.append({
-                                  'title'      :title
-                                 ,'date'       :date
-                                 ,'url'        :url
-                                 ,'description':description
-                                })
-            articles.append((section,inarts))
-        return articles
-
-    def index_to_soup(self, url_or_raw, raw=False):
-        if re.match(r'\w+://', url_or_raw):
-            open_func = getattr(self.browser, 'open_novisit', self.browser.open)
-            with closing(open_func(url_or_raw)) as f:
-                _raw = f.read()
-            if not _raw:
-                raise RuntimeError('Could not fetch index from %s'%url_or_raw)
-        else:
-            _raw = url_or_raw
-        if raw:
-            return _raw
-        if not isinstance(_raw, unicode) and self.encoding:
-            if callable(self.encoding):
-                _raw = self.encoding(_raw)
-            else:
-                _raw = _raw.decode(self.encoding, 'replace')
-        massage = list(BeautifulSoup.MARKUP_MASSAGE)
-        enc = 'cp1252' if callable(self.encoding) or self.encoding is None else self.encoding
-        massage.append((re.compile(r'&(\S+?);'), lambda match:
-            entity_to_unicode(match, encoding=enc)))
-        massage.append((re.compile(r'[\x00-\x08]+'), lambda match:
-            ''))
-        return BeautifulSoup(_raw, markupMassage=massage)
+    feeds          = [(u'NIN Online', u'http://www.nin.co.rs/misc/rss.php?feed=RSS2.0')]
+    
+    def get_article_url(self, article):
+        url = BasicNewsRecipe.get_article_url(self, article)
+        return url.replace('.co.yu', '.co.rs')

    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):