calibre/recipes/philippino_star_ngayon.recipe


'''
www.philstar.com
'''

import time
from calibre.web.feeds.recipes import BasicNewsRecipe


class PilipinoStarNgayon(BasicNewsRecipe):
    title = 'Pilipino Star Ngayon'
    custom_title = "Pilipino Star Ngayon - " + \
        time.strftime('%d %b %Y %I:%M %p')
    __author__ = 'jde'
    __date__ = '31 May 2012'
    __version__ = '1.0'
    description = 'A daily Tabloid written in Tagalog, distributed in the Philippines. A tabloid style newspaper published in the national language - philstar.com is a Philippine news and entertainment portal for the Filipino global community.   It is the online presence of the STAR Group of Publications, a leading publisher of newspapers and magazines in the Philippines.'  # noqa
    language = 'tgl'
    publisher = 'The Philippine STAR'
    category = 'news, Philippines'
    tags = 'news, Philippines'
    cover_url = 'http://www.philstar.com/images/logo_PSN.jpg'
    masthead_url = 'http://www.philstar.com/images/logo_PSN.jpg'
    oldest_article = 1.5  # days
    max_articles_per_feed = 25
    simultaneous_downloads = 10
    publication_type = 'newspaper'
    timefmt = ' [%a, %d %b %Y %I:%M %p]'
    no_stylesheets = True
    use_embedded_content = False
    encoding = None
    recursions = 0
    needs_subscription = False
    remove_javascript = True
    remove_empty_feeds = True
    auto_cleanup = False

    remove_tags = [dict(name='img',  attrs={'id': 'Image1'})  # Logo
                   # Section (Headlines, Nation, Metro, ...)
                   # Comments
                   # View Comments
                   # Zoom
                   , dict(name='span', attrs={'id': 'ControlArticle1_LabelHeader'}), dict(name='a',      attrs={'id': 'ControlArticle1_FormView1_hlComments'}), dict(name='img',  attrs={'src': 'images/post-comments.jpg'}), dict(name='a',      attrs={'id': 'ControlArticle1_FormView1_ControlPhotoAndCaption1_hlImageCaption'})  # noqa
                   ]
    conversion_options = {'title': custom_title,
                          'comments': description,
                          'tags': tags,
                          'language': language,
                          'publisher': publisher,
                          'authors': publisher,
                          'smarten_punctuation': True
                          }

    feeds = [

    ('Litra-talk'      		, 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=535'),
    ('Bansa'       		, 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=92'),
    ('Probinsiya'     		, 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=49'),
    ('Metro'          		, 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=93'),
    ('Opinyon'            	, 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=94'),
    ('Palaro'        		, 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=95'),
    ('Showbiz'        		, 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=96'),
    ('True Confessions'        	, 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=97'),
    ('Dr. Love'        		, 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=98'),
    ('Kutob'        		, 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=99'),
    ('Komiks'        		, 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=100')
    ]

# process the printer friendly version of article
    def print_version(self, url):
        return url.replace('/Article', '/ArticlePrinterFriendly')

# obtain title from printer friendly version of article; avoiding
# add_toc_thumbnail changing title when article has image
    def populate_article_metadata(self, article, soup, first):
        article.title = soup.find(
            'span', {'id': 'ControlArticle1_FormView1_ArticleHeaderLabel'}).contents[0].strip()