mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Fix #892860 (Updated recipe for NIN online)
This commit is contained in:
parent
0d7ca78a5e
commit
408a71c485
@ -80,59 +80,11 @@ class Nin(BasicNewsRecipe):
|
||||
return self.PREFIX + item.img['src']
|
||||
return cover_url
|
||||
|
||||
def parse_index(self):
|
||||
articles = []
|
||||
count = 0
|
||||
soup = self.index_to_soup(self.INDEX)
|
||||
for item in soup.findAll('a',attrs={'class':'lmeninavFont'}):
|
||||
count = count +1
|
||||
if self.test and count > 2:
|
||||
return articles
|
||||
section = self.tag_to_string(item)
|
||||
feedlink = self.PREFIX + item['href']
|
||||
feedpage = self.index_to_soup(feedlink)
|
||||
self.report_progress(0, _('Fetching feed')+' %s...'%(section))
|
||||
inarts = []
|
||||
for art in feedpage.findAll('span',attrs={'class':'artTitle'}):
|
||||
alink = art.parent
|
||||
url = self.PREFIX + alink['href']
|
||||
title = self.tag_to_string(art)
|
||||
sparent = alink.parent
|
||||
alink.extract()
|
||||
description = self.tag_to_string(sparent)
|
||||
date = strftime(self.timefmt)
|
||||
inarts.append({
|
||||
'title' :title
|
||||
,'date' :date
|
||||
,'url' :url
|
||||
,'description':description
|
||||
})
|
||||
articles.append((section,inarts))
|
||||
return articles
|
||||
|
||||
def index_to_soup(self, url_or_raw, raw=False):
|
||||
if re.match(r'\w+://', url_or_raw):
|
||||
open_func = getattr(self.browser, 'open_novisit', self.browser.open)
|
||||
with closing(open_func(url_or_raw)) as f:
|
||||
_raw = f.read()
|
||||
if not _raw:
|
||||
raise RuntimeError('Could not fetch index from %s'%url_or_raw)
|
||||
else:
|
||||
_raw = url_or_raw
|
||||
if raw:
|
||||
return _raw
|
||||
if not isinstance(_raw, unicode) and self.encoding:
|
||||
if callable(self.encoding):
|
||||
_raw = self.encoding(_raw)
|
||||
else:
|
||||
_raw = _raw.decode(self.encoding, 'replace')
|
||||
massage = list(BeautifulSoup.MARKUP_MASSAGE)
|
||||
enc = 'cp1252' if callable(self.encoding) or self.encoding is None else self.encoding
|
||||
massage.append((re.compile(r'&(\S+?);'), lambda match:
|
||||
entity_to_unicode(match, encoding=enc)))
|
||||
massage.append((re.compile(r'[\x00-\x08]+'), lambda match:
|
||||
''))
|
||||
return BeautifulSoup(_raw, markupMassage=massage)
|
||||
feeds = [(u'NIN Online', u'http://www.nin.co.rs/misc/rss.php?feed=RSS2.0')]
|
||||
|
||||
def get_article_url(self, article):
|
||||
url = BasicNewsRecipe.get_article_url(self, article)
|
||||
return url.replace('.co.yu', '.co.rs')
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
|
Loading…
x
Reference in New Issue
Block a user