calibre/recipes/banat_news.recipe
2012-06-07 13:06:08 +05:30

69 lines
3.4 KiB
Plaintext

'''
www.philstar.com
'''
import time
from calibre.web.feeds.recipes import BasicNewsRecipe
class BanatNews(BasicNewsRecipe):
title = 'Banat News'
custom_title = "Banat News - " + time.strftime('%d %b %Y %I:%M %p')
__author__ = 'jde'
__date__ = '31 May 2012'
__version__ = '1.0'
description = 'Banat News is a daily Cebuano-language newspaper based in Cebu, Philippines - philstar.com is a Philippine news and entertainment portal for the Filipino global community. It is the online presence of the STAR Group of Publications, a leading publisher of newspapers and magazines in the Philippines.'
language = 'ceb'
publisher = 'The Philippine STAR'
category = 'news, Philippines'
tags = 'news, Philippines'
cover_url = 'http://www.philstar.com/images/logo_Banat.jpg'
masthead_url = 'http://www.philstar.com/images/logo_Banat.jpg'
oldest_article = 1.5 #days
max_articles_per_feed = 25
simultaneous_downloads = 10
publication_type = 'newspaper'
timefmt = ' [%a, %d %b %Y %I:%M %p]'
no_stylesheets = True
use_embedded_content = False
encoding = None
recursions = 0
needs_subscription = False
remove_javascript = True
remove_empty_feeds = True
auto_cleanup = False
remove_tags = [dict(name='img', attrs={'id':'Image1'}) #Logo
,dict(name='span', attrs={'id':'ControlArticle1_LabelHeader'}) #Section (Headlines, Nation, Metro, ...)
,dict(name='a', attrs={'id':'ControlArticle1_FormView1_hlComments'}) #Comments
,dict(name='img', attrs={'src':'images/post-comments.jpg'}) #View Comments
,dict(name='a', attrs={'id':'ControlArticle1_FormView1_ControlPhotoAndCaption1_hlImageCaption'}) #Zoom
]
conversion_options = { 'title' : custom_title,
'comments' : description,
'tags' : tags,
'language' : language,
'publisher' : publisher,
'authors' : publisher,
'smarten_punctuation' : True
}
feeds = [
('Balita' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=101' )
,('Opinyon' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=102' )
,('Kalingawan' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=104' )
,('Showbiz' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=62' )
,('Palaro' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=103' )
,('Imong Kapalaran' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=105' )
]
# process the printer friendly version of article
def print_version(self, url):
return url.replace('/Article', '/ArticlePrinterFriendly')
# obtain title from printer friendly version of article; avoiding add_toc_thumbnail changing title when article has image
def populate_article_metadata(self, article, soup, first):
article.title = soup.find('span', {'id': 'ControlArticle1_FormView1_ArticleHeaderLabel'}).contents[0].strip()