mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
Command used: futurize --no-diffs -f libfuturize.fixes.fix_print_with_import -f lib2to3.fixes.fix_throw -f lib2to3.fixes.fix_numliterals -f lib2to3.fixes.fix_except -f lib2to3.fixes.fix_exec -f lib2to3.fixes.fix_raise -f lib2to3.fixes.fix_tuple_params -f lib2to3.fixes.fix_ne -j20 -w -n setup recipes src manual setup.py recipes/*.recipe And manual adjustments of print((...)) -> print(...)
61 lines
2.2 KiB
Plaintext
61 lines
2.2 KiB
Plaintext
from __future__ import print_function
|
|
import re
|
|
from calibre import strftime
|
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
|
|
|
|
|
class Nuus24(BasicNewsRecipe):
|
|
|
|
title = 'Nuus24'
|
|
__author__ = 'Nicki de Wet'
|
|
encoding = 'utf-8'
|
|
description = 'Daaglikse Afrikaanse Nuus via Nuus24'
|
|
language = 'af'
|
|
publisher = 'Media24'
|
|
timefmt = ' [%a, %d %b, %Y]'
|
|
masthead_url = 'http://afrikaans.news24.com/images/nuus.jpg'
|
|
max_articles_per_feed = 25
|
|
remove_tags_before = dict(id='TheFeed')
|
|
remove_tags_after = dict(id='TheFeed')
|
|
remove_tags = [dict(
|
|
attrs={
|
|
'class': [
|
|
'personal-bar row-fluid', 'navbar main-menu-fixed',
|
|
'breaking-news-wrapper', 'row-fluid comments-bg',
|
|
'unstyled actions', 'modal-body', 'modal-header', 'desktop']}),
|
|
dict(id=['weather-forecast', 'topics', 'side-widgets',
|
|
'footer-container', 'sb-container', 'myModal']),
|
|
dict(name=['script', 'noscript', 'style'])]
|
|
|
|
keep_only_tags = [dict(attrs={'class': ['span8 border-right']}),
|
|
dict(name=['article', 'section']),
|
|
dict(id=['img-wrapper'])]
|
|
extra_css = """ div.carousel-inner{ overflow:hidden;display: block;height:300px;} img{display: block} """
|
|
no_stylesheets = True
|
|
|
|
def parse_index(self):
|
|
soup = self.index_to_soup('http://afrikaans.news24.com/Index.aspx')
|
|
|
|
def feed_title(div):
|
|
return ''.join(div.findAll(text=True, recursive=False)).strip()
|
|
|
|
articles = {}
|
|
key = None
|
|
key = 'Nuus in Afrikaans'
|
|
articles[key] = []
|
|
ans = []
|
|
|
|
for anchor in soup.findAll(True,
|
|
attrs={'id': ['lnkLink']}):
|
|
url = re.sub(r'\?.*', '', anchor['href'])
|
|
title = self.tag_to_string(anchor, use_alt=True).strip()
|
|
print(title)
|
|
description = ''
|
|
pubdate = strftime('%a, %d %b')
|
|
articles[key].append(
|
|
dict(title=title, url=url, date=pubdate,
|
|
description=description,
|
|
content=''))
|
|
ans = [(key, articles[key])]
|
|
return ans
|