mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
80 lines
3.0 KiB
Plaintext
80 lines
3.0 KiB
Plaintext
import time
|
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
|
|
|
|
|
class TheManilaBulletin(BasicNewsRecipe):
|
|
title = u'The Manila Bulletin'
|
|
custom_title = "The Manila Bulletin - " + \
|
|
time.strftime('%d %b %Y %I:%M %p')
|
|
__author__ = 'jde'
|
|
__date__ = '06 June 2012'
|
|
__version__ = '1.0'
|
|
description = "The Manila Bulletin, (also known as the Bulletin and previously known as the Manila Daily Bulletin and the Bulletin Today) is the Philippines' largest broadsheet newspaper by circulation." # noqa
|
|
language = 'en_PH'
|
|
publisher = 'The Manila Bulletin'
|
|
category = 'news, Philippines'
|
|
tags = 'news, Philippines'
|
|
cover_url = 'http://www.mb.com.ph/sites/default/files/mb_logo.jpg'
|
|
masthead_url = 'http://www.mb.com.ph/sites/default/files/mb_logo.jpg'
|
|
oldest_article = 1.5 # days
|
|
max_articles_per_feed = 25
|
|
simultaneous_downloads = 20
|
|
publication_type = 'newspaper'
|
|
timefmt = ' [%a, %d %b %Y %I:%M %p]'
|
|
no_stylesheets = True
|
|
use_embedded_content = False
|
|
encoding = None
|
|
recursions = 0
|
|
needs_subscription = False
|
|
remove_javascript = True
|
|
remove_empty_feeds = True
|
|
|
|
keep_only_tags = [
|
|
dict(name='div', attrs={'class': 'article node'}), dict(name='div', attrs={
|
|
'class': 'label'}), dict(name='div', attrs={'class': 'content clear-block'})
|
|
]
|
|
|
|
remove_tags = [
|
|
dict(name='li', attrs={'class': 'print_html'}),
|
|
dict(name='li', attrs={'class': 'print_html first'}),
|
|
dict(name='li', attrs={'class': 'print_mail'}),
|
|
dict(name='li', attrs={'class': 'print_mail last'}),
|
|
dict(name='div', attrs={'class': 'article-sidebar'}), dict(name='table', attrs={'id': 'attachments'})
|
|
]
|
|
|
|
auto_cleanup = False
|
|
|
|
conversion_options = {'title': custom_title,
|
|
'comments': description,
|
|
'tags': tags,
|
|
'language': language,
|
|
'publisher': publisher,
|
|
'authors': publisher,
|
|
'smarten_punctuation': True
|
|
}
|
|
|
|
feeds = [
|
|
# ,
|
|
(u'Regional', u'http://www.mb.com.ph/feed/news/regional'),
|
|
|
|
(u'Main News', u'http://www.mb.com.ph/feed/news/main'),
|
|
(u'Business', u'http://www.mb.com.ph/feed/business'),
|
|
(u'Sports', u'http://www.mb.com.ph/feed/sports'),
|
|
(u'Entertainment', u'http://www.mb.com.ph/feed/entertainment'),
|
|
(u'Opinion', u'http://www.mb.com.ph/feed/news/opinion'),
|
|
(u'Agriculture', u'http://www.mb.com.ph/feed/news/agriculture'),
|
|
(u'Technology', u'http://www.mb.com.ph/feed/lifestyle/technology'),
|
|
(u'Lifestyle', u'http://www.mb.com.ph/feed/lifestyle'),
|
|
(u'Drive', u'http://www.mb.com.ph/feed/lifestyle/drive'),
|
|
]
|
|
|
|
|
|
# if use print version - convert url
|
|
# http://www.mb.com.ph/articles/361252/higher-power-rate-looms
|
|
# http://www.mb.com.ph/print/361252
|
|
#
|
|
# def print_version(self,url):
|
|
# segments = url.split('/')
|
|
# printURL = '/'.join(segments[0:3]) + '/print/' + '/'.join(segments[5])
|
|
# return printURL
|