calibre/recipes/the_manila_bulletin.recipe

import time
from calibre.web.feeds.recipes import BasicNewsRecipe

class TheManilaBulletin(BasicNewsRecipe):
    title          		= u'The Manila Bulletin'
    custom_title 	= "The Manila Bulletin - " + time.strftime('%d %b %Y %I:%M %p')
    __author__             	= 'jde'
    __date__                	= '06 June 2012'
    __version__            	 = '1.0'
    description            	= "The Manila Bulletin, (also known as the Bulletin and previously known as the Manila Daily Bulletin and the Bulletin Today) is the Philippines' largest broadsheet newspaper by circulation."
    language               	= 'en_PH'
    publisher              	= 'The Manila Bulletin'
    category               	= 'news, Philippines'
    tags 		= 'news, Philippines'
    cover_url        	= 'http://www.mb.com.ph/sites/default/files/mb_logo.jpg'
    masthead_url        	= 'http://www.mb.com.ph/sites/default/files/mb_logo.jpg'
    oldest_article 	= 1.5 #days
    max_articles_per_feed  	= 25
    simultaneous_downloads = 20
    publication_type 	= 'newspaper'
    timefmt 		= ' [%a, %d %b %Y %I:%M %p]'
    no_stylesheets         	= True
    use_embedded_content  = False
    encoding        	= None
    recursions      	= 0
    needs_subscription 	= False
    remove_javascript 	= True
    remove_empty_feeds    	= True

    keep_only_tags = [
	        dict(name='div', attrs={'class':'article node'})
	       ,dict(name='div', attrs={'class':'label'})
	       ,dict(name='div', attrs={'class':'content clear-block'})
	       ]

    remove_tags = [
	        dict(name='li', attrs={'class':'print_html'})
	       ,dict(name='li', attrs={'class':'print_html first'})
	       ,dict(name='li', attrs={'class':'print_mail'})
	       ,dict(name='li', attrs={'class':'print_mail last'})
	       ,dict(name='div', attrs={'class':'article-sidebar'})
	       ,dict(name='table', attrs={'id':'attachments'})
	       ]

    auto_cleanup 	= False


    conversion_options = { 'title'   : custom_title,
                           'comments'    : description,
                           'tags'             : tags,
                           'language'      : language,
                           'publisher'      : publisher,
                           'authors'        : publisher,
                           'smarten_punctuation' : True
                            }

    feeds          = [
	(u'Main News', u'http://www.mb.com.ph/feed/news/main')
#	, (u'Regional', u'http://www.mb.com.ph/feed/news/regional')
	, (u'Business', u'http://www.mb.com.ph/feed/business')
	, (u'Sports', u'http://www.mb.com.ph/feed/sports')
	, (u'Entertainment', u'http://www.mb.com.ph/feed/entertainment')
	, (u'Opinion', u'http://www.mb.com.ph/feed/news/opinion')
#	, (u'Agriculture', u'http://www.mb.com.ph/feed/news/agriculture')
#	, (u'Environment', u'http://www.mb.com.ph/feed/news/environment')
	, (u'Technology', u'http://www.mb.com.ph/feed/lifestyle/technology')
	, (u'Lifestyle', u'http://www.mb.com.ph/feed/lifestyle')
#	, (u'Arts & Living', u'http://www.mb.com.ph/feed/lifestyle/arts-and-living')
#	, (u'Drive', u'http://www.mb.com.ph/feed/lifestyle/drive')
#	, (u'Food', u'http://www.mb.com.ph/feed/lifestyle/food')
#	, (u'Travel', u'http://www.mb.com.ph/feed/lifestyle/travel')
#	, (u'Picture Perfect', u'http://www.mb.com.ph/feed/lifestyle/picture-perfect')
	]


#  if use print version - convert url
#  http://www.mb.com.ph/articles/361252/higher-power-rate-looms
#  http://www.mb.com.ph/print/361252
#
#    def print_version(self,url):
#        segments = url.split('/')
#        printURL = '/'.join(segments[0:3]) + '/print/' + '/'.join(segments[5])
#        return printURL