From 7a6634d405739675d1dfd25cc55ed32912c701da Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 9 Feb 2011 21:15:20 -0700 Subject: [PATCH] Kompas and Jakarta Post by Adrian Gunawan --- resources/recipes/jakarta_post.recipe | 67 +++++++++++++++++++++++ resources/recipes/kompas.recipe | 77 +++++++++++++++++++++++++++ src/calibre/utils/localization.py | 1 + 3 files changed, 145 insertions(+) create mode 100644 resources/recipes/jakarta_post.recipe create mode 100644 resources/recipes/kompas.recipe diff --git a/resources/recipes/jakarta_post.recipe b/resources/recipes/jakarta_post.recipe new file mode 100644 index 0000000000..d8d609469d --- /dev/null +++ b/resources/recipes/jakarta_post.recipe @@ -0,0 +1,67 @@ +#!/usr/bin/env python +__license__ = 'GPL v3' +__copyright__ = '2011, Adrian Gunawan ' +__author__ = 'Adrian Gunawan' +__version__ = 'v1.0' +__date__ = '02 February 2011' + +''' +http://www.thejakartapost.com/ +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class JakartaPost(BasicNewsRecipe): + title = u'Jakarta Post' + masthead_url = 'http://www.thejakartapost.com/images/jakartapost_logo.jpg' + cover_url = 'http://www.thejakartapost.com/images/jakartapost_logo.jpg' + + __author__ = u'Adrian Gunawan' + description = u'Indonesian Newspaper in English from Jakarta Post Online Edition' + category = 'breaking news, national, business, international, Indonesia' + language = 'en_ID' + oldest_article = 2 + max_articles_per_feed = 100 + + no_stylesheets = True + use_embedded_content = False + no_javascript = True + remove_empty_feeds = True + + timefmt = ' [%A, %d %B, %Y]' + encoding = 'utf-8' + + keep_only_tags = [dict(name='div', attrs ={'id':'news-main'})] + + extra_css = ''' + h1{font-family:Georgia,"Times New Roman",Times,serif; font-weight:bold; font-size:large;} + .cT-storyDetails{font-family:Arial,Helvetica,sans-serif; color:#666666;font-size:x-small;} + .articleBody{font-family:Arial,Helvetica,sans-serif; color:black;font-size:small;} + .cT-imageLandscape{font-family:Arial,Helvetica,sans-serif; color:#333333 ;font-size:x-small;} + .source{font-family:Arial,Helvetica,sans-serif; color:#333333 ;font-size:xx-small;} + #content{font-family:Arial,Helvetica,sans-serif;font-size:x-small;} + .pageprint{font-family:Arial,Helvetica,sans-serif;font-size:small;} + #bylineDetails{font-family:Arial,Helvetica,sans-serif; color:#666666;font-size:x-small;} + .featurePic-wide{font-family:Arial,Helvetica,sans-serif;font-size:x-small;} + #idfeaturepic{font-family:Arial,Helvetica,sans-serif;font-size:x-small;} + h3{font-family:Georgia,"Times New Roman",Times,serif; font-size:small;} + h2{font-family:Georgia,"Times New Roman",Times,serif; font-size:small;} + h4{font-family:Georgia,"Times New Roman",Times,serif; font-size:small;} + h5{font-family:Georgia,"Times New Roman",Times,serif; font-size:small;} + body{font-family:Arial,Helvetica,sans-serif; font-size:x-small;} + ''' + + remove_tags = [ + dict(name='div', attrs ={'class':['text-size']}), + ] + + feeds = [ + + (u'Breaking News', u'http://www.thejakartapost.com/breaking/feed'), + (u'National', u'http://www.thejakartapost.com/channel/national/feed'), + (u'Archipelago', u'http://www.thejakartapost.com/channel/archipelago/feed'), + (u'Business', u'http://www.thejakartapost.com/channel/business/feed'), + (u'Jakarta', u'http://www.thejakartapost.com/channel/jakarta/feed'), + (u'World', u'http://www.thejakartapost.com/channel/world/feed'), + (u'Sports', u'http://www.thejakartapost.com/channel/sports/feed'), + ] diff --git a/resources/recipes/kompas.recipe b/resources/recipes/kompas.recipe new file mode 100644 index 0000000000..2f2804d59a --- /dev/null +++ b/resources/recipes/kompas.recipe @@ -0,0 +1,77 @@ +#!/usr/bin/env python +__license__ = 'GPL v3' +__copyright__ = '2011, Adrian Gunawan ' +__author__ = 'Adrian Gunawan' +__version__ = 'v1.0' +__date__ = '02 February 2011' + +''' +http://www.kompas.com/ +''' + +import re +from calibre.web.feeds.news import BasicNewsRecipe + +class Kompas(BasicNewsRecipe): + title = u'Kompas' + masthead_url = 'http://stat.k.kidsklik.com/data/2k10/kompascom2011/images/logo_kompas.png' + cover_url = 'http://stat.k.kidsklik.com/data/2k10/kompascom2011/images/logo_kompas.png' + + __author__ = u'Adrian Gunawan' + description = u'Indonesian News from Kompas Online Edition' + category = 'local news, international, business, Indonesia' + language = 'id' + oldest_article = 5 + max_articles_per_feed = 100 + + no_stylesheets = True + use_embedded_content = False + no_javascript = True + remove_empty_feeds = True + + timefmt = ' [%A, %d %B, %Y]' + encoding = 'utf-8' + + keep_only_tags = [dict(name='div', attrs ={'class':'content_kiri_detail'})] + + extra_css = ''' + h1{font-family:Georgia,"Times New Roman",Times,serif; font-weight:bold; font-size:large;} + .cT-storyDetails{font-family:Arial,Helvetica,sans-serif; color:#666666;font-size:x-small;} + .articleBody{font-family:Arial,Helvetica,sans-serif; color:black;font-size:small;} + .cT-imageLandscape{font-family:Arial,Helvetica,sans-serif; color:#333333 ;font-size:x-small;} + .source{font-family:Arial,Helvetica,sans-serif; color:#333333 ;font-size:xx-small;} + #content{font-family:Arial,Helvetica,sans-serif;font-size:x-small;} + .pageprint{font-family:Arial,Helvetica,sans-serif;font-size:small;} + #bylineDetails{font-family:Arial,Helvetica,sans-serif; color:#666666;font-size:x-small;} + .featurePic-wide{font-family:Arial,Helvetica,sans-serif;font-size:x-small;} + #idfeaturepic{font-family:Arial,Helvetica,sans-serif;font-size:x-small;} + h3{font-family:Georgia,"Times New Roman",Times,serif; font-size:small;} + h2{font-family:Georgia,"Times New Roman",Times,serif; font-size:small;} + h4{font-family:Georgia,"Times New Roman",Times,serif; font-size:small;} + h5{font-family:Georgia,"Times New Roman",Times,serif; font-size:small;} + body{font-family:Arial,Helvetica,sans-serif; font-size:x-small;} + ''' + + remove_tags = [ + dict(name='div', attrs ={'class':['c_biru_kompas2011', 'c_abu01_kompas2011', 'c_abu_01_kompas2011', 'right', 'clearit']}), + dict(name='div', attrs ={'id':['comment_list', 'comment_paging', 'share']}), + dict(name='form'), + dict(name='ul'), + ] + + preprocess_regexps = [ + (re.compile(r'.*', re.DOTALL|re.IGNORECASE),lambda match: ''), + (re.compile(r'Sent Using.*', re.DOTALL|re.IGNORECASE),lambda match: ''), + (re.compile(r'Kirim Komentar Anda', re.DOTALL|re.IGNORECASE),lambda match: ''), + (re.compile(r']*>Kembali ke Index Topik Pilihan', re.DOTALL|re.IGNORECASE),lambda match: ''), + ] + + feeds = [ + (u'Nasional', u'http://www.kompas.com/getrss/nasional'), + (u'Regional', u'http://www.kompas.com/getrss/regional'), + (u'Internasional', u'http://www.kompas.com/getrss/internasional'), + (u'Megapolitan', u'http://www.kompas.com/getrss/megapolitan'), + (u'Bisnis Keuangan', u'http://www.kompas.com/getrss/bisniskeuangan'), + (u'Kesehatan', u'http://www.kompas.com/getrss/kesehatan'), + (u'Olahraga', u'http://www.kompas.com/getrss/olahraga'), + ] diff --git a/src/calibre/utils/localization.py b/src/calibre/utils/localization.py index 97356df081..1f869a6475 100644 --- a/src/calibre/utils/localization.py +++ b/src/calibre/utils/localization.py @@ -107,6 +107,7 @@ _extra_lang_codes = { 'en_CZ' : _('English (Czechoslovakia)'), 'en_PK' : _('English (Pakistan)'), 'en_HR' : _('English (Croatia)'), + 'en_ID' : _('English (Indonesia)'), 'en_IL' : _('English (Israel)'), 'en_SG' : _('English (Singapore)'), 'en_YE' : _('English (Yemen)'),