calibre/recipes/philippino_star_ngayon.recipe
Kovid Goyal 93b9612097 ...
2012-06-08 10:21:55 +05:30

74 lines
4.0 KiB
Plaintext

'''
www.philstar.com
'''
import time
from calibre.web.feeds.recipes import BasicNewsRecipe
class PilipinoStarNgayon(BasicNewsRecipe):
title = 'Pilipino Star Ngayon'
custom_title = "Pilipino Star Ngayon - " + time.strftime('%d %b %Y %I:%M %p')
__author__ = 'jde'
__date__ = '31 May 2012'
__version__ = '1.0'
description = 'A daily Tabloid written in Tagalog, distributed in the Philippines. A tabloid style newspaper published in the national language - philstar.com is a Philippine news and entertainment portal for the Filipino global community. It is the online presence of the STAR Group of Publications, a leading publisher of newspapers and magazines in the Philippines.'
language = 'tgl'
publisher = 'The Philippine STAR'
category = 'news, Philippines'
tags = 'news, Philippines'
cover_url = 'http://www.philstar.com/images/logo_PSN.jpg'
masthead_url = 'http://www.philstar.com/images/logo_PSN.jpg'
oldest_article = 1.5 #days
max_articles_per_feed = 25
simultaneous_downloads = 10
publication_type = 'newspaper'
timefmt = ' [%a, %d %b %Y %I:%M %p]'
no_stylesheets = True
use_embedded_content = False
encoding = None
recursions = 0
needs_subscription = False
remove_javascript = True
remove_empty_feeds = True
auto_cleanup = False
remove_tags = [dict(name='img', attrs={'id':'Image1'}) #Logo
,dict(name='span', attrs={'id':'ControlArticle1_LabelHeader'}) #Section (Headlines, Nation, Metro, ...)
,dict(name='a', attrs={'id':'ControlArticle1_FormView1_hlComments'}) #Comments
,dict(name='img', attrs={'src':'images/post-comments.jpg'}) #View Comments
,dict(name='a', attrs={'id':'ControlArticle1_FormView1_ControlPhotoAndCaption1_hlImageCaption'}) #Zoom
]
conversion_options = { 'title' : custom_title,
'comments' : description,
'tags' : tags,
'language' : language,
'publisher' : publisher,
'authors' : publisher,
'smarten_punctuation' : True
}
feeds = [
('Litra-talk' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=535' )
,('Bansa' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=92' )
,('Probinsiya' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=49' )
,('Metro' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=93' )
,('Opinyon' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=94' )
,('Palaro' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=95' )
,('Showbiz' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=96' )
,('True Confessions' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=97' )
,('Dr. Love' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=98' )
,('Kutob' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=99' )
,('Komiks' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=100' )
]
# process the printer friendly version of article
def print_version(self, url):
return url.replace('/Article', '/ArticlePrinterFriendly')
# obtain title from printer friendly version of article; avoiding add_toc_thumbnail changing title when article has image
def populate_article_metadata(self, article, soup, first):
article.title = soup.find('span', {'id': 'ControlArticle1_FormView1_ArticleHeaderLabel'}).contents[0].strip()