calibre/recipes/the_manila_bulletin.recipe
Kovid Goyal 567040ee1e Perform PEP8 compliance checks on the entire codebase
Some bits of PEP 8 are turned off via setup.cfg
2016-07-29 21:25:17 +05:30

80 lines
3.0 KiB
Plaintext

import time
from calibre.web.feeds.recipes import BasicNewsRecipe
class TheManilaBulletin(BasicNewsRecipe):
title = u'The Manila Bulletin'
custom_title = "The Manila Bulletin - " + \
time.strftime('%d %b %Y %I:%M %p')
__author__ = 'jde'
__date__ = '06 June 2012'
__version__ = '1.0'
description = "The Manila Bulletin, (also known as the Bulletin and previously known as the Manila Daily Bulletin and the Bulletin Today) is the Philippines' largest broadsheet newspaper by circulation." # noqa
language = 'en_PH'
publisher = 'The Manila Bulletin'
category = 'news, Philippines'
tags = 'news, Philippines'
cover_url = 'http://www.mb.com.ph/sites/default/files/mb_logo.jpg'
masthead_url = 'http://www.mb.com.ph/sites/default/files/mb_logo.jpg'
oldest_article = 1.5 # days
max_articles_per_feed = 25
simultaneous_downloads = 20
publication_type = 'newspaper'
timefmt = ' [%a, %d %b %Y %I:%M %p]'
no_stylesheets = True
use_embedded_content = False
encoding = None
recursions = 0
needs_subscription = False
remove_javascript = True
remove_empty_feeds = True
keep_only_tags = [
dict(name='div', attrs={'class': 'article node'}), dict(name='div', attrs={
'class': 'label'}), dict(name='div', attrs={'class': 'content clear-block'})
]
remove_tags = [
dict(name='li', attrs={'class': 'print_html'}),
dict(name='li', attrs={'class': 'print_html first'}),
dict(name='li', attrs={'class': 'print_mail'}),
dict(name='li', attrs={'class': 'print_mail last'}),
dict(name='div', attrs={'class': 'article-sidebar'}), dict(name='table', attrs={'id': 'attachments'})
]
auto_cleanup = False
conversion_options = {'title': custom_title,
'comments': description,
'tags': tags,
'language': language,
'publisher': publisher,
'authors': publisher,
'smarten_punctuation': True
}
feeds = [
# ,
(u'Regional', u'http://www.mb.com.ph/feed/news/regional'),
(u'Main News', u'http://www.mb.com.ph/feed/news/main'),
(u'Business', u'http://www.mb.com.ph/feed/business'),
(u'Sports', u'http://www.mb.com.ph/feed/sports'),
(u'Entertainment', u'http://www.mb.com.ph/feed/entertainment'),
(u'Opinion', u'http://www.mb.com.ph/feed/news/opinion'),
(u'Agriculture', u'http://www.mb.com.ph/feed/news/agriculture'),
(u'Technology', u'http://www.mb.com.ph/feed/lifestyle/technology'),
(u'Lifestyle', u'http://www.mb.com.ph/feed/lifestyle'),
(u'Drive', u'http://www.mb.com.ph/feed/lifestyle/drive'),
]
# if use print version - convert url
# http://www.mb.com.ph/articles/361252/higher-power-rate-looms
# http://www.mb.com.ph/print/361252
#
# def print_version(self,url):
# segments = url.split('/')
# printURL = '/'.join(segments[0:3]) + '/print/' + '/'.join(segments[5])
# return printURL