#!/usr/bin/env python __license__ = 'GPL v3' __copyright__ = '2011, Adrian Gunawan ' __author__ = 'Adrian Gunawan' __version__ = 'v1.0' __date__ = '02 February 2011' ''' http://www.kompas.com/ ''' import re from calibre.web.feeds.news import BasicNewsRecipe class Kompas(BasicNewsRecipe): title = u'Kompas' masthead_url = 'http://stat.k.kidsklik.com/data/2k10/kompascom2011/images/logo_kompas.png' cover_url = 'http://stat.k.kidsklik.com/data/2k10/kompascom2011/images/logo_kompas.png' __author__ = u'Adrian Gunawan' description = u'Indonesian News from Kompas Online Edition' category = 'local news, international, business, Indonesia' language = 'id' oldest_article = 5 max_articles_per_feed = 100 no_stylesheets = True use_embedded_content = False no_javascript = True remove_empty_feeds = True timefmt = ' [%A, %d %B, %Y]' encoding = 'utf-8' keep_only_tags = [dict(name='div', attrs={'class': 'content_kiri_detail'})] extra_css = ''' h1{font-family:Georgia,"Times New Roman",Times,serif; font-weight:bold; font-size:large;} .cT-storyDetails{font-family:Arial,Helvetica,sans-serif; color:#666666;font-size:x-small;} .articleBody{font-family:Arial,Helvetica,sans-serif; color:black;font-size:small;} .cT-imageLandscape{font-family:Arial,Helvetica,sans-serif; color:#333333 ;font-size:x-small;} .source{font-family:Arial,Helvetica,sans-serif; color:#333333 ;font-size:xx-small;} #content{font-family:Arial,Helvetica,sans-serif;font-size:x-small;} .pageprint{font-family:Arial,Helvetica,sans-serif;font-size:small;} #bylineDetails{font-family:Arial,Helvetica,sans-serif; color:#666666;font-size:x-small;} .featurePic-wide{font-family:Arial,Helvetica,sans-serif;font-size:x-small;} #idfeaturepic{font-family:Arial,Helvetica,sans-serif;font-size:x-small;} h3{font-family:Georgia,"Times New Roman",Times,serif; font-size:small;} h2{font-family:Georgia,"Times New Roman",Times,serif; font-size:small;} h4{font-family:Georgia,"Times New Roman",Times,serif; font-size:small;} h5{font-family:Georgia,"Times New Roman",Times,serif; font-size:small;} body{font-family:Arial,Helvetica,sans-serif; font-size:x-small;} ''' remove_tags = [ dict(name='div', attrs={'class': [ 'c_biru_kompas2011', 'c_abu01_kompas2011', 'c_abu_01_kompas2011', 'right', 'clearit']}), dict(name='div', attrs={ 'id': ['comment_list', 'comment_paging', 'share']}), dict(name='form'), dict(name='ul'), ] preprocess_regexps = [ (re.compile(r'.*', re.DOTALL | re.IGNORECASE), lambda match: ''), (re.compile(r'Sent Using.*', re.DOTALL | re.IGNORECASE), lambda match: ''), (re.compile(r'Kirim Komentar Anda', re.DOTALL | re.IGNORECASE), lambda match: ''), (re.compile(r']*>Kembali ke Index Topik Pilihan', re.DOTALL | re.IGNORECASE), lambda match: ''), ] feeds = [ (u'Nasional', u'http://www.kompas.com/getrss/nasional'), (u'Regional', u'http://www.kompas.com/getrss/regional'), (u'Internasional', u'http://www.kompas.com/getrss/internasional'), (u'Megapolitan', u'http://www.kompas.com/getrss/megapolitan'), (u'Bisnis Keuangan', u'http://www.kompas.com/getrss/bisniskeuangan'), (u'Kesehatan', u'http://www.kompas.com/getrss/kesehatan'), (u'Olahraga', u'http://www.kompas.com/getrss/olahraga'), ]