calibre/recipes/the_manila_bulletin.recipe
2012-06-07 13:06:08 +05:30

89 lines
3.6 KiB
Plaintext

import time
from calibre.web.feeds.recipes import BasicNewsRecipe
class TheManilaBulletin(BasicNewsRecipe):
title = u'The Manila Bulletin'
custom_title = "The Manila Bulletin - " + time.strftime('%d %b %Y %I:%M %p')
__author__ = 'jde'
__date__ = '06 June 2012'
__version__ = '1.0'
description = "The Manila Bulletin, (also known as the Bulletin and previously known as the Manila Daily Bulletin and the Bulletin Today) is the Philippines' largest broadsheet newspaper by circulation."
language = 'en_PH'
publisher = 'The Manila Bulletin'
category = 'news, Philippines'
tags = 'news, Philippines'
cover_url = 'http://www.mb.com.ph/sites/default/files/mb_logo.jpg'
masthead_url = 'http://www.mb.com.ph/sites/default/files/mb_logo.jpg'
oldest_article = 1.5 #days
max_articles_per_feed = 25
simultaneous_downloads = 20
publication_type = 'newspaper'
timefmt = ' [%a, %d %b %Y %I:%M %p]'
no_stylesheets = True
use_embedded_content = False
encoding = None
recursions = 0
needs_subscription = False
remove_javascript = True
remove_empty_feeds = True
keep_only_tags = [
dict(name='div', attrs={'class':'article node'})
,dict(name='div', attrs={'class':'label'})
,dict(name='div', attrs={'class':'content clear-block'})
]
remove_tags = [
dict(name='li', attrs={'class':'print_html'})
,dict(name='li', attrs={'class':'print_html first'})
,dict(name='li', attrs={'class':'print_mail'})
,dict(name='li', attrs={'class':'print_mail last'})
,dict(name='div', attrs={'class':'article-sidebar'})
,dict(name='table', attrs={'id':'attachments'})
]
auto_cleanup = False
conversion_options = { 'title' : custom_title,
'comments' : description,
'tags' : tags,
'language' : language,
'publisher' : publisher,
'authors' : publisher,
'smarten_punctuation' : True
}
feeds = [
(u'Main News', u'http://www.mb.com.ph/feed/news/main')
# , (u'Regional', u'http://www.mb.com.ph/feed/news/regional')
, (u'Business', u'http://www.mb.com.ph/feed/business')
, (u'Sports', u'http://www.mb.com.ph/feed/sports')
, (u'Entertainment', u'http://www.mb.com.ph/feed/entertainment')
, (u'Opinion', u'http://www.mb.com.ph/feed/news/opinion')
# , (u'Agriculture', u'http://www.mb.com.ph/feed/news/agriculture')
# , (u'Environment', u'http://www.mb.com.ph/feed/news/environment')
, (u'Technology', u'http://www.mb.com.ph/feed/lifestyle/technology')
, (u'Lifestyle', u'http://www.mb.com.ph/feed/lifestyle')
# , (u'Arts & Living', u'http://www.mb.com.ph/feed/lifestyle/arts-and-living')
# , (u'Drive', u'http://www.mb.com.ph/feed/lifestyle/drive')
# , (u'Food', u'http://www.mb.com.ph/feed/lifestyle/food')
# , (u'Travel', u'http://www.mb.com.ph/feed/lifestyle/travel')
# , (u'Picture Perfect', u'http://www.mb.com.ph/feed/lifestyle/picture-perfect')
]
# if use print version - convert url
# http://www.mb.com.ph/articles/361252/higher-power-rate-looms
# http://www.mb.com.ph/print/361252
#
# def print_version(self,url):
# segments = url.split('/')
# printURL = '/'.join(segments[0:3]) + '/print/' + '/'.join(segments[5])
# return printURL