Kompas and Jakarta Post by Adrian Gunawan

This commit is contained in:
Kovid Goyal 2011-02-09 21:15:20 -07:00
parent dfcfc697d5
commit 7a6634d405
3 changed files with 145 additions and 0 deletions

View File

@ -0,0 +1,67 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2011, Adrian Gunawan <agunawan at adrnalin.com>'
__author__ = 'Adrian Gunawan'
__version__ = 'v1.0'
__date__ = '02 February 2011'
'''
http://www.thejakartapost.com/
'''
from calibre.web.feeds.news import BasicNewsRecipe
class JakartaPost(BasicNewsRecipe):
title = u'Jakarta Post'
masthead_url = 'http://www.thejakartapost.com/images/jakartapost_logo.jpg'
cover_url = 'http://www.thejakartapost.com/images/jakartapost_logo.jpg'
__author__ = u'Adrian Gunawan'
description = u'Indonesian Newspaper in English from Jakarta Post Online Edition'
category = 'breaking news, national, business, international, Indonesia'
language = 'en_ID'
oldest_article = 2
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
no_javascript = True
remove_empty_feeds = True
timefmt = ' [%A, %d %B, %Y]'
encoding = 'utf-8'
keep_only_tags = [dict(name='div', attrs ={'id':'news-main'})]
extra_css = '''
h1{font-family:Georgia,"Times New Roman",Times,serif; font-weight:bold; font-size:large;}
.cT-storyDetails{font-family:Arial,Helvetica,sans-serif; color:#666666;font-size:x-small;}
.articleBody{font-family:Arial,Helvetica,sans-serif; color:black;font-size:small;}
.cT-imageLandscape{font-family:Arial,Helvetica,sans-serif; color:#333333 ;font-size:x-small;}
.source{font-family:Arial,Helvetica,sans-serif; color:#333333 ;font-size:xx-small;}
#content{font-family:Arial,Helvetica,sans-serif;font-size:x-small;}
.pageprint{font-family:Arial,Helvetica,sans-serif;font-size:small;}
#bylineDetails{font-family:Arial,Helvetica,sans-serif; color:#666666;font-size:x-small;}
.featurePic-wide{font-family:Arial,Helvetica,sans-serif;font-size:x-small;}
#idfeaturepic{font-family:Arial,Helvetica,sans-serif;font-size:x-small;}
h3{font-family:Georgia,"Times New Roman",Times,serif; font-size:small;}
h2{font-family:Georgia,"Times New Roman",Times,serif; font-size:small;}
h4{font-family:Georgia,"Times New Roman",Times,serif; font-size:small;}
h5{font-family:Georgia,"Times New Roman",Times,serif; font-size:small;}
body{font-family:Arial,Helvetica,sans-serif; font-size:x-small;}
'''
remove_tags = [
dict(name='div', attrs ={'class':['text-size']}),
]
feeds = [
(u'Breaking News', u'http://www.thejakartapost.com/breaking/feed'),
(u'National', u'http://www.thejakartapost.com/channel/national/feed'),
(u'Archipelago', u'http://www.thejakartapost.com/channel/archipelago/feed'),
(u'Business', u'http://www.thejakartapost.com/channel/business/feed'),
(u'Jakarta', u'http://www.thejakartapost.com/channel/jakarta/feed'),
(u'World', u'http://www.thejakartapost.com/channel/world/feed'),
(u'Sports', u'http://www.thejakartapost.com/channel/sports/feed'),
]

View File

@ -0,0 +1,77 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2011, Adrian Gunawan <agunawan at adrnalin.com>'
__author__ = 'Adrian Gunawan'
__version__ = 'v1.0'
__date__ = '02 February 2011'
'''
http://www.kompas.com/
'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
class Kompas(BasicNewsRecipe):
title = u'Kompas'
masthead_url = 'http://stat.k.kidsklik.com/data/2k10/kompascom2011/images/logo_kompas.png'
cover_url = 'http://stat.k.kidsklik.com/data/2k10/kompascom2011/images/logo_kompas.png'
__author__ = u'Adrian Gunawan'
description = u'Indonesian News from Kompas Online Edition'
category = 'local news, international, business, Indonesia'
language = 'id'
oldest_article = 5
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
no_javascript = True
remove_empty_feeds = True
timefmt = ' [%A, %d %B, %Y]'
encoding = 'utf-8'
keep_only_tags = [dict(name='div', attrs ={'class':'content_kiri_detail'})]
extra_css = '''
h1{font-family:Georgia,"Times New Roman",Times,serif; font-weight:bold; font-size:large;}
.cT-storyDetails{font-family:Arial,Helvetica,sans-serif; color:#666666;font-size:x-small;}
.articleBody{font-family:Arial,Helvetica,sans-serif; color:black;font-size:small;}
.cT-imageLandscape{font-family:Arial,Helvetica,sans-serif; color:#333333 ;font-size:x-small;}
.source{font-family:Arial,Helvetica,sans-serif; color:#333333 ;font-size:xx-small;}
#content{font-family:Arial,Helvetica,sans-serif;font-size:x-small;}
.pageprint{font-family:Arial,Helvetica,sans-serif;font-size:small;}
#bylineDetails{font-family:Arial,Helvetica,sans-serif; color:#666666;font-size:x-small;}
.featurePic-wide{font-family:Arial,Helvetica,sans-serif;font-size:x-small;}
#idfeaturepic{font-family:Arial,Helvetica,sans-serif;font-size:x-small;}
h3{font-family:Georgia,"Times New Roman",Times,serif; font-size:small;}
h2{font-family:Georgia,"Times New Roman",Times,serif; font-size:small;}
h4{font-family:Georgia,"Times New Roman",Times,serif; font-size:small;}
h5{font-family:Georgia,"Times New Roman",Times,serif; font-size:small;}
body{font-family:Arial,Helvetica,sans-serif; font-size:x-small;}
'''
remove_tags = [
dict(name='div', attrs ={'class':['c_biru_kompas2011', 'c_abu01_kompas2011', 'c_abu_01_kompas2011', 'right', 'clearit']}),
dict(name='div', attrs ={'id':['comment_list', 'comment_paging', 'share']}),
dict(name='form'),
dict(name='ul'),
]
preprocess_regexps = [
(re.compile(r'<!--TERKAIT -->.*<!--TERKAIT END -->', re.DOTALL|re.IGNORECASE),lambda match: ''),
(re.compile(r'<strong>Sent Using.*</body>', re.DOTALL|re.IGNORECASE),lambda match: ''),
(re.compile(r'<strong>Kirim Komentar Anda</strong>', re.DOTALL|re.IGNORECASE),lambda match: ''),
(re.compile(r'<a[^>]*>Kembali ke Index Topik Pilihan</a>', re.DOTALL|re.IGNORECASE),lambda match: ''),
]
feeds = [
(u'Nasional', u'http://www.kompas.com/getrss/nasional'),
(u'Regional', u'http://www.kompas.com/getrss/regional'),
(u'Internasional', u'http://www.kompas.com/getrss/internasional'),
(u'Megapolitan', u'http://www.kompas.com/getrss/megapolitan'),
(u'Bisnis Keuangan', u'http://www.kompas.com/getrss/bisniskeuangan'),
(u'Kesehatan', u'http://www.kompas.com/getrss/kesehatan'),
(u'Olahraga', u'http://www.kompas.com/getrss/olahraga'),
]

View File

@ -107,6 +107,7 @@ _extra_lang_codes = {
'en_CZ' : _('English (Czechoslovakia)'),
'en_PK' : _('English (Pakistan)'),
'en_HR' : _('English (Croatia)'),
'en_ID' : _('English (Indonesia)'),
'en_IL' : _('English (Israel)'),
'en_SG' : _('English (Singapore)'),
'en_YE' : _('English (Yemen)'),