import time from calibre.web.feeds.recipes import BasicNewsRecipe class TheManilaBulletin(BasicNewsRecipe): title = u'The Manila Bulletin' custom_title = "The Manila Bulletin - " + \ time.strftime('%d %b %Y %I:%M %p') __author__ = 'jde' __date__ = '06 June 2012' __version__ = '1.0' description = "The Manila Bulletin, (also known as the Bulletin and previously known as the Manila Daily Bulletin and the Bulletin Today) is the Philippines' largest broadsheet newspaper by circulation." # noqa language = 'en_PH' publisher = 'The Manila Bulletin' category = 'news, Philippines' tags = 'news, Philippines' cover_url = 'http://www.mb.com.ph/sites/default/files/mb_logo.jpg' masthead_url = 'http://www.mb.com.ph/sites/default/files/mb_logo.jpg' oldest_article = 1.5 # days max_articles_per_feed = 25 simultaneous_downloads = 20 publication_type = 'newspaper' timefmt = ' [%a, %d %b %Y %I:%M %p]' no_stylesheets = True use_embedded_content = False encoding = None recursions = 0 needs_subscription = False remove_javascript = True remove_empty_feeds = True keep_only_tags = [ dict(name='div', attrs={'class': 'article node'}), dict(name='div', attrs={ 'class': 'label'}), dict(name='div', attrs={'class': 'content clear-block'}) ] remove_tags = [ dict(name='li', attrs={'class': 'print_html'}), dict(name='li', attrs={'class': 'print_html first'}), dict(name='li', attrs={'class': 'print_mail'}), dict(name='li', attrs={'class': 'print_mail last'}), dict(name='div', attrs={'class': 'article-sidebar'}), dict(name='table', attrs={'id': 'attachments'}) ] auto_cleanup = False conversion_options = {'title': custom_title, 'comments': description, 'tags': tags, 'language': language, 'publisher': publisher, 'authors': publisher, 'smarten_punctuation': True } feeds = [ # , (u'Regional', u'http://www.mb.com.ph/feed/news/regional'), (u'Main News', u'http://www.mb.com.ph/feed/news/main'), (u'Business', u'http://www.mb.com.ph/feed/business'), (u'Sports', u'http://www.mb.com.ph/feed/sports'), (u'Entertainment', u'http://www.mb.com.ph/feed/entertainment'), (u'Opinion', u'http://www.mb.com.ph/feed/news/opinion'), (u'Agriculture', u'http://www.mb.com.ph/feed/news/agriculture'), (u'Technology', u'http://www.mb.com.ph/feed/lifestyle/technology'), (u'Lifestyle', u'http://www.mb.com.ph/feed/lifestyle'), (u'Drive', u'http://www.mb.com.ph/feed/lifestyle/drive'), ] # if use print version - convert url # http://www.mb.com.ph/articles/361252/higher-power-rate-looms # http://www.mb.com.ph/print/361252 # # def print_version(self,url): # segments = url.split('/') # printURL = '/'.join(segments[0:3]) + '/print/' + '/'.join(segments[5]) # return printURL