mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
News24 and Nuus24 by Nicki de Wet
This commit is contained in:
parent
2489b7a82d
commit
7cf205f0c7
53
recipes/news24.recipe
Normal file
53
recipes/news24.recipe
Normal file
@ -0,0 +1,53 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AdvancedUserRecipe1375900744(BasicNewsRecipe):
|
||||
title = u'News24'
|
||||
description = "News24."
|
||||
__author__ = 'Nicki de Wet'
|
||||
publisher = 'Media24'
|
||||
category = 'news, politics, South Africa'
|
||||
oldest_article = 3
|
||||
max_articles_per_feed = 20
|
||||
no_stylesheets = True
|
||||
encoding = 'utf8'
|
||||
use_embedded_content = False
|
||||
auto_cleanup = False
|
||||
language = 'en_ZA'
|
||||
remove_empty_feeds = True
|
||||
publication_type = 'newsportal'
|
||||
masthead_url = 'http://www.24.com/images/widgethead_news.png'
|
||||
extra_css = """
|
||||
body{font-family: Arial,Helvetica,sans-serif }
|
||||
img{display: block}
|
||||
"""
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
remove_tags = [
|
||||
dict(name=['object','embed','iframe','table','meta','link']),
|
||||
dict(attrs={
|
||||
'class':['TwitterfacebookLink','superSportArticleBlock',
|
||||
'videoHighlights', 'facebookComments','share',
|
||||
'item_block','kalahari_product left', 'block red',
|
||||
'credit']}),
|
||||
dict(attrs={'id':['comments_wrap', 'article_toolbox_bot',
|
||||
'inside_news','sponsored-links', 'lnkGalleries',
|
||||
'relatedlinks_box', 'lnkUserGalleries',
|
||||
'lnkNewsGalleries', 'relatedlinks',
|
||||
'divRelatedLinks']})]
|
||||
|
||||
keep_only_tags = [
|
||||
dict(attrs={'class':['left col633', 'article col626',
|
||||
'columnWrapperLeft', 'articlecolumn',
|
||||
'article_img', 'picture_caption', 'DiveTable']})]
|
||||
|
||||
feeds = [
|
||||
(u'Top Stories', u'http://feeds.news24.com/articles/news24/TopStories/rss'),
|
||||
(u'South Africa', u'http://feeds.news24.com/articles/news24/SouthAfrica/rss'),
|
||||
(u'World', u'http://feeds.news24.com/articles/news24/World/rss'),
|
||||
(u'Sport', u'http://feeds.24.com/articles/sport/featured/topstories/rss')]
|
57
recipes/nuus24.recipe
Normal file
57
recipes/nuus24.recipe
Normal file
@ -0,0 +1,57 @@
|
||||
import re
|
||||
from calibre import strftime
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
class Nuus24(BasicNewsRecipe):
|
||||
|
||||
title = 'Nuus24'
|
||||
__author__ = 'Nicki de Wet'
|
||||
encoding = 'utf-8'
|
||||
description = 'Daaglikse Afrikaanse Nuus via Nuus24'
|
||||
language = 'af'
|
||||
publisher = 'Media24'
|
||||
timefmt = ' [%a, %d %b, %Y]'
|
||||
masthead_url = 'http://afrikaans.news24.com/images/nuus.jpg'
|
||||
max_articles_per_feed = 25
|
||||
remove_tags_before = dict(id='TheFeed')
|
||||
remove_tags_after = dict(id='TheFeed')
|
||||
remove_tags = [dict(
|
||||
attrs={
|
||||
'class':[
|
||||
'personal-bar row-fluid', 'navbar main-menu-fixed',
|
||||
'breaking-news-wrapper', 'row-fluid comments-bg',
|
||||
'unstyled actions', 'modal-body', 'modal-header', 'desktop']}),
|
||||
dict(id=['weather-forecast', 'topics', 'side-widgets', 'footer-container', 'sb-container', 'myModal']),
|
||||
dict(name=['script', 'noscript', 'style'])]
|
||||
|
||||
keep_only_tags = [dict(attrs={'class':['span8 border-right']}),
|
||||
dict(name=['article', 'section']),
|
||||
dict(id=['img-wrapper'])]
|
||||
extra_css = """ div.carousel-inner{ overflow:hidden;display: block;height:300px;} img{display: block} """
|
||||
no_stylesheets = True
|
||||
|
||||
def parse_index(self):
|
||||
soup = self.index_to_soup('http://afrikaans.news24.com/Index.aspx')
|
||||
|
||||
def feed_title(div):
|
||||
return ''.join(div.findAll(text=True, recursive=False)).strip()
|
||||
|
||||
articles = {}
|
||||
key = None
|
||||
key = 'Nuus in Afrikaans'
|
||||
articles[key] = []
|
||||
ans= []
|
||||
|
||||
for anchor in soup.findAll(True,
|
||||
attrs={'id':['lnkLink']}):
|
||||
url = re.sub(r'\?.*', '', anchor['href'])
|
||||
title = self.tag_to_string(anchor, use_alt=True).strip()
|
||||
print title
|
||||
description = ''
|
||||
pubdate = strftime('%a, %d %b')
|
||||
articles[key].append(
|
||||
dict(title=title, url=url, date=pubdate,
|
||||
description=description,
|
||||
content=''))
|
||||
ans = [(key, articles[key])]
|
||||
return ans
|
Loading…
x
Reference in New Issue
Block a user