Various Philippine news sources by jde

This commit is contained in:
Kovid Goyal 2012-06-07 13:06:08 +05:30
parent 3212986d45
commit fc76cae3fa
10 changed files with 709 additions and 0 deletions

68
recipes/banat_news.recipe Normal file
View File

@ -0,0 +1,68 @@
'''
www.philstar.com
'''
import time
from calibre.web.feeds.recipes import BasicNewsRecipe
class BanatNews(BasicNewsRecipe):
title = 'Banat News'
custom_title = "Banat News - " + time.strftime('%d %b %Y %I:%M %p')
__author__ = 'jde'
__date__ = '31 May 2012'
__version__ = '1.0'
description = 'Banat News is a daily Cebuano-language newspaper based in Cebu, Philippines - philstar.com is a Philippine news and entertainment portal for the Filipino global community. It is the online presence of the STAR Group of Publications, a leading publisher of newspapers and magazines in the Philippines.'
language = 'ceb'
publisher = 'The Philippine STAR'
category = 'news, Philippines'
tags = 'news, Philippines'
cover_url = 'http://www.philstar.com/images/logo_Banat.jpg'
masthead_url = 'http://www.philstar.com/images/logo_Banat.jpg'
oldest_article = 1.5 #days
max_articles_per_feed = 25
simultaneous_downloads = 10
publication_type = 'newspaper'
timefmt = ' [%a, %d %b %Y %I:%M %p]'
no_stylesheets = True
use_embedded_content = False
encoding = None
recursions = 0
needs_subscription = False
remove_javascript = True
remove_empty_feeds = True
auto_cleanup = False
remove_tags = [dict(name='img', attrs={'id':'Image1'}) #Logo
,dict(name='span', attrs={'id':'ControlArticle1_LabelHeader'}) #Section (Headlines, Nation, Metro, ...)
,dict(name='a', attrs={'id':'ControlArticle1_FormView1_hlComments'}) #Comments
,dict(name='img', attrs={'src':'images/post-comments.jpg'}) #View Comments
,dict(name='a', attrs={'id':'ControlArticle1_FormView1_ControlPhotoAndCaption1_hlImageCaption'}) #Zoom
]
conversion_options = { 'title' : custom_title,
'comments' : description,
'tags' : tags,
'language' : language,
'publisher' : publisher,
'authors' : publisher,
'smarten_punctuation' : True
}
feeds = [
('Balita' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=101' )
,('Opinyon' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=102' )
,('Kalingawan' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=104' )
,('Showbiz' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=62' )
,('Palaro' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=103' )
,('Imong Kapalaran' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=105' )
]
# process the printer friendly version of article
def print_version(self, url):
return url.replace('/Article', '/ArticlePrinterFriendly')
# obtain title from printer friendly version of article; avoiding add_toc_thumbnail changing title when article has image
def populate_article_metadata(self, article, soup, first):
article.title = soup.find('span', {'id': 'ControlArticle1_FormView1_ArticleHeaderLabel'}).contents[0].strip()

View File

@ -0,0 +1,74 @@
from calibre.web.feeds.recipes import BasicNewsRecipe
import time
class MalayaBusinessInsight(BasicNewsRecipe):
title = u'Malaya Business Insight'
custom_title = "Malaya Business Insight - " + time.strftime('%d %b %Y %I:%M %p')
__author__ = 'jde'
__date__ = '06 June 2012'
__version__ = '1.0'
description = "The Malaya Business Insight is a broadsheet newspaper in the Philippines. The newspaper's name was derived from the Filipino word that means 'freedom'."
language = 'en_PH'
publisher = 'Malaya Business Insight'
category = 'news, Philippines'
tags = 'news, Philippines'
cover_url = 'http://www.malaya.com.ph/templates/ja_teline_iv/images/logo.png'
masthead_url = 'http://www.malaya.com.ph/templates/ja_teline_iv/images/logo.png'
oldest_article = 1.5 #days
max_articles_per_feed = 25
simultaneous_downloads = 20
publication_type = 'newspaper'
timefmt = ' [%a, %d %b %Y %I:%M %p]'
no_stylesheets = True
use_embedded_content = False
encoding = None
recursions = 0
needs_subscription = False
remove_javascript = True
remove_empty_feeds = True
auto_cleanup = False
keep_only_tags = [
dict(name='div', attrs={'id':'ja-main'})
]
remove_tags = [
dict(name='a', attrs={'class':'ja-back-btn'})
,dict(name='li', attrs={'class':'print-icon'})
,dict(name='li', attrs={'class':'email-icon'})
,dict(name='p', attrs={'class':'dnn'})
,dict(name='span', attrs={'class':'breadcrumbs pathway'})
,dict(name='dt', attrs={'class':'article-info-term'})
]
conversion_options = { 'title' : custom_title,
'comments' : description,
'tags' : tags,
'language' : language,
'publisher' : publisher,
'authors' : publisher,
'smarten_punctuation' : True
}
feeds = [
(u'Business', u'http://www.malaya.com.ph/index.php/business?format=feed&type=rss')
, (u'Market', u'http://www.malaya.com.ph/index.php/business/market?format=feed&type=rss')
, (u'Shipping and Transportation', u'http://www.malaya.com.ph/index.php/business/shipping-and-transportation?format=feed&type=rss')
, (u'Business Incidental', u'http://www.malaya.com.ph/index.php/business/business-incidental?format=feed&type=rss')
, (u'Banking and Finance', u'http://www.malaya.com.ph/index.php/special-features/banking-and-finance?format=feed&type=rss')
, (u'Motoring', u'http://www.malaya.com.ph/index.php/special-features/motoring?format=feed&type=rss')
, (u'Info Tech - Telecoms', u'http://www.malaya.com.ph/index.php/special-features/infotech-telecoms?format=feed&type=rss')
, (u'Property', u'http://www.malaya.com.ph/index.php/special-features/property?format=feed&type=rss')
, (u'Environment', u'http://www.malaya.com.ph/index.php/special-features/environment?format=feed&type=rss')
, (u'Agriculture', u'http://www.malaya.com.ph/index.php/special-features/agriculture?format=feed&type=rss')
, (u'News - National', u'http://www.malaya.com.ph/index.php/news/nation?format=feed&type=rss')
, (u'News - International', u'http://www.malaya.com.ph/index.php/news/international?format=feed&type=rss')
, (u'Sports', u'http://www.malaya.com.ph/index.php/sports?format=feed&type=rss')
, (u'Entertainment', u'http://www.malaya.com.ph/index.php/entertainment?format=feed&type=rss')
, (u'Living', u'http://www.malaya.com.ph/index.php/living?format=feed&type=rss')
, (u'Opinion', u'http://www.malaya.com.ph/index.php/opinion?format=feed&type=rss')
]

View File

@ -0,0 +1,54 @@
from calibre.web.feeds.recipes import BasicNewsRecipe
import time
class ManilaStandardToday(BasicNewsRecipe):
title = u'Manila Standard Today'
custom_title = "Manila Standard Today - " + time.strftime('%d %b %Y %I:%M %p')
__author__ = 'jde'
__date__ = '06 June 2012'
__version__ = '1.0'
description = 'The Manila Standard Today is the fourth-largest broadsheet newspaper in the Philippines as of 2006. Initially established as the Manila Standard, it merged with another newspaper of record, Today, on March 6, 2005. It was the first newspaper merger in the Philippines.'
language = 'en_PH'
publisher = 'Manila Standard Today'
category = 'news, Philippines'
tags = 'news, Philippines'
cover_url = 'http://www.manilastandardtoday.com/wp-content/uploads/Manila-Standard-Today-June-06-12.jpg'
masthead_url = 'http://www.manilastandardtoday.com/wp-content/uploads/Manila-Standard-Today-June-06-12.jpg'
oldest_article = 1.5 #days
max_articles_per_feed = 25
simultaneous_downloads = 20
publication_type = 'newspaper'
timefmt = ' [%a, %d %b %Y %I:%M %p]'
no_stylesheets = True
use_embedded_content = False
encoding = None
recursions = 0
needs_subscription = False
remove_javascript = True
remove_empty_feeds = True
auto_cleanup = False
keep_only_tags = [
dict(name='div', attrs={'id':'main'})
]
conversion_options = { 'title' : custom_title,
'comments' : description,
'tags' : tags,
'language' : language,
'publisher' : publisher,
'authors' : publisher,
'smarten_punctuation' : True
}
feeds = [
(u'Headlines', u'http://news.manilastandardtoday.com/feed/')
, (u'Nation', u'http://news.manilastandardtoday.com/archives/nation/feed/')
, (u'Business', u'http://business.manilastandardtoday.com/feed/')
, (u'Metro', u'http://news.manilastandardtoday.com/archives/metro/feed/')
, (u'Sports', u'http://sports.manilastandardtoday.com/feed/')
, (u'Entertainment', u'http://entertainment.manilastandardtoday.com/feed/')
, (u'Opinion', u'http://opinion.manilastandardtoday.com/feed/')
, (u'Lifestyle', u'http://lifestyle.manilastandardtoday.com/feed/')
]

View File

@ -0,0 +1,73 @@
'''
www.philstar.com
'''
import time
from calibre.web.feeds.recipes import BasicNewsRecipe
class PilipinoStarNgayon(BasicNewsRecipe):
title = 'Pilipino Star Ngayon'
custom_title = "Pilipino Star Ngayon - " + time.strftime('%d %b %Y %I:%M %p')
__author__ = 'jde'
__date__ = '31 May 2012'
__version__ = '1.0'
description = 'A daily Tabloid written in Tagalog, distributed in the Philippines. A tabloid style newspaper published in the national language - philstar.com is a Philippine news and entertainment portal for the Filipino global community. It is the online presence of the STAR Group of Publications, a leading publisher of newspapers and magazines in the Philippines.'
language = 'tgl'
publisher = 'The Philippine STAR'
category = 'news, Philippines'
tags = 'news, Philippines'
cover_url = 'http://www.philstar.com/images/logo_PSN.jpg'
masthead_url = 'http://www.philstar.com/images/logo_PSN.jpg'
oldest_article = 1.5 #days
max_articles_per_feed = 25
simultaneous_downloads = 10
publication_type = 'newspaper'
timefmt = ' [%a, %d %b %Y %I:%M %p]'
no_stylesheets = True
use_embedded_content = False
encoding = None
recursions = 0
needs_subscription = False
remove_javascript = True
remove_empty_feeds = True
auto_cleanup = False
remove_tags = [dict(name='img', attrs={'id':'Image1'}) #Logo
,dict(name='span', attrs={'id':'ControlArticle1_LabelHeader'}) #Section (Headlines, Nation, Metro, ...)
,dict(name='a', attrs={'id':'ControlArticle1_FormView1_hlComments'}) #Comments
,dict(name='img', attrs={'src':'images/post-comments.jpg'}) #View Comments
,dict(name='a', attrs={'id':'ControlArticle1_FormView1_ControlPhotoAndCaption1_hlImageCaption'}) #Zoom
]
conversion_options = { 'title' : custom_title,
'comments' : description,
'tags' : tags,
'language' : language,
'publisher' : publisher,
'authors' : publisher,
'smarten_punctuation' : True
}
feeds = [
('Litra-talk' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=535' )
,('Bansa' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=92' )
,('Probinsiya' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=49' )
,('Metro' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=93' )
,('Opinyon' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=94' )
,('Palaro' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=95' )
,('Showbiz' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=96' )
,('True Confessions' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=97' )
,('Dr. Love' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=98' )
,('Kutob' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=99' )
,('Komiks' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=100' )
]
# process the printer friendly version of article
def print_version(self, url):
return url.replace('/Article', '/ArticlePrinterFriendly')
# obtain title from printer friendly version of article; avoiding add_toc_thumbnail changing title when article has image
def populate_article_metadata(self, article, soup, first):
article.title = soup.find('span', {'id': 'ControlArticle1_FormView1_ArticleHeaderLabel'}).contents[0].strip()

View File

@ -0,0 +1,70 @@
'''
www.philstar.com
'''
import time
from calibre.web.feeds.recipes import BasicNewsRecipe
class Freeman(BasicNewsRecipe):
title = 'The Freeman'
custom_title = "The Freeman - " + time.strftime('%d %b %Y %I:%M %p')
__author__ = 'jde'
__date__ = '31 May 2012'
__version__ = '1.0'
description = 'The Freeman is a daily English-language newspaper published in Cebu, Philippines, by the Philippine Star. It was the first newspaper in Cebu, first published in May 1919. The motto of the newspaper is "The fair and fearless" - philstar.com is a Philippine news and entertainment portal for the Filipino global community. It is the online presence of the STAR Group of Publications, a leading publisher of newspapers and magazines in the Philippines.'
language = 'en_PH'
publisher = 'The Philippine STAR'
category = 'news, Philippines'
tags = 'news, Philippines'
cover_url = 'http://www.philstar.com/images/logo_Freeman.jpg'
masthead_url = 'http://www.philstar.com/images/logo_Freeman.jpg'
oldest_article = 1.5 #days
max_articles_per_feed = 25
simultaneous_downloads = 10
publication_type = 'newspaper'
timefmt = ' [%a, %d %b %Y %I:%M %p]'
no_stylesheets = True
use_embedded_content = False
encoding = None
recursions = 0
needs_subscription = False
remove_javascript = True
remove_empty_feeds = True
auto_cleanup = False
remove_tags = [dict(name='img', attrs={'id':'Image1'}) #Logo
,dict(name='span', attrs={'id':'ControlArticle1_LabelHeader'}) #Section (Headlines, Nation, Metro, ...)
,dict(name='a', attrs={'id':'ControlArticle1_FormView1_hlComments'}) #Comments
,dict(name='img', attrs={'src':'images/post-comments.jpg'}) #View Comments
,dict(name='a', attrs={'id':'ControlArticle1_FormView1_ControlPhotoAndCaption1_hlImageCaption'}) #Zoom
]
conversion_options = { 'title' : custom_title,
'comments' : description,
'tags' : tags,
'language' : language,
'publisher' : publisher,
'authors' : publisher,
'smarten_punctuation' : True
}
feeds = [
('Cebu News' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=107' )
,('Freeman Opinion' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=109' )
,('Metro Cebu' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=531' )
,('Region' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=530' )
,('Cebu Business' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=108' )
,('Cebu Sports' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=110' )
,('Cebu Lifestyle' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=111' )
,('Cebu Entertainment' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=51' )
]
# process the printer friendly version of article
def print_version(self, url):
return url.replace('/Article', '/ArticlePrinterFriendly')
# obtain title from printer friendly version of article; avoiding add_toc_thumbnail changing title when article has image
def populate_article_metadata(self, article, soup, first):
article.title = soup.find('span', {'id': 'ControlArticle1_FormView1_ArticleHeaderLabel'}).contents[0].strip()

View File

@ -0,0 +1,88 @@
import time
from calibre.web.feeds.recipes import BasicNewsRecipe
class TheManilaBulletin(BasicNewsRecipe):
title = u'The Manila Bulletin'
custom_title = "The Manila Bulletin - " + time.strftime('%d %b %Y %I:%M %p')
__author__ = 'jde'
__date__ = '06 June 2012'
__version__ = '1.0'
description = "The Manila Bulletin, (also known as the Bulletin and previously known as the Manila Daily Bulletin and the Bulletin Today) is the Philippines' largest broadsheet newspaper by circulation."
language = 'en_PH'
publisher = 'The Manila Bulletin'
category = 'news, Philippines'
tags = 'news, Philippines'
cover_url = 'http://www.mb.com.ph/sites/default/files/mb_logo.jpg'
masthead_url = 'http://www.mb.com.ph/sites/default/files/mb_logo.jpg'
oldest_article = 1.5 #days
max_articles_per_feed = 25
simultaneous_downloads = 20
publication_type = 'newspaper'
timefmt = ' [%a, %d %b %Y %I:%M %p]'
no_stylesheets = True
use_embedded_content = False
encoding = None
recursions = 0
needs_subscription = False
remove_javascript = True
remove_empty_feeds = True
keep_only_tags = [
dict(name='div', attrs={'class':'article node'})
,dict(name='div', attrs={'class':'label'})
,dict(name='div', attrs={'class':'content clear-block'})
]
remove_tags = [
dict(name='li', attrs={'class':'print_html'})
,dict(name='li', attrs={'class':'print_html first'})
,dict(name='li', attrs={'class':'print_mail'})
,dict(name='li', attrs={'class':'print_mail last'})
,dict(name='div', attrs={'class':'article-sidebar'})
,dict(name='table', attrs={'id':'attachments'})
]
auto_cleanup = False
conversion_options = { 'title' : custom_title,
'comments' : description,
'tags' : tags,
'language' : language,
'publisher' : publisher,
'authors' : publisher,
'smarten_punctuation' : True
}
feeds = [
(u'Main News', u'http://www.mb.com.ph/feed/news/main')
# , (u'Regional', u'http://www.mb.com.ph/feed/news/regional')
, (u'Business', u'http://www.mb.com.ph/feed/business')
, (u'Sports', u'http://www.mb.com.ph/feed/sports')
, (u'Entertainment', u'http://www.mb.com.ph/feed/entertainment')
, (u'Opinion', u'http://www.mb.com.ph/feed/news/opinion')
# , (u'Agriculture', u'http://www.mb.com.ph/feed/news/agriculture')
# , (u'Environment', u'http://www.mb.com.ph/feed/news/environment')
, (u'Technology', u'http://www.mb.com.ph/feed/lifestyle/technology')
, (u'Lifestyle', u'http://www.mb.com.ph/feed/lifestyle')
# , (u'Arts & Living', u'http://www.mb.com.ph/feed/lifestyle/arts-and-living')
# , (u'Drive', u'http://www.mb.com.ph/feed/lifestyle/drive')
# , (u'Food', u'http://www.mb.com.ph/feed/lifestyle/food')
# , (u'Travel', u'http://www.mb.com.ph/feed/lifestyle/travel')
# , (u'Picture Perfect', u'http://www.mb.com.ph/feed/lifestyle/picture-perfect')
]
# if use print version - convert url
# http://www.mb.com.ph/articles/361252/higher-power-rate-looms
# http://www.mb.com.ph/print/361252
#
# def print_version(self,url):
# segments = url.split('/')
# printURL = '/'.join(segments[0:3]) + '/print/' + '/'.join(segments[5])
# return printURL

View File

@ -0,0 +1,55 @@
import time
from calibre.web.feeds.recipes import BasicNewsRecipe
class TheManilaTimes(BasicNewsRecipe):
title = u'The Manila Times'
custom_title = "The Manila Times - " + time.strftime('%d %b %Y %I:%M %p')
__author__ = 'jde'
__date__ = '06 June 2012'
__version__ = '1.0'
description = 'The Manila Times is the oldest existing English language newspaper in the Philippines.'
language = 'en_PH'
publisher = 'The Manila Times'
category = 'news, Philippines'
tags = 'news, Philippines'
cover_url = 'http://www.manilatimes.net/images/banners/logo-mt.png'
masthead_url = 'http://www.manilatimes.net/images/banners/logo-mt.png'
oldest_article = 1.5 #days
max_articles_per_feed = 25
simultaneous_downloads = 20
publication_type = 'newspaper'
timefmt = ' [%a, %d %b %Y %I:%M %p]'
no_stylesheets = True
use_embedded_content = False
encoding = None
recursions = 0
needs_subscription = False
remove_javascript = True
remove_empty_feeds = True
remove_tags = [
dict(name='img', attrs={'alt':'Print'})
,dict(name='img', attrs={'alt':'Email:'})
,dict(name='dd', attrs={'class':'hits'})
]
auto_cleanup = True
conversion_options = { 'title' : custom_title,
'comments' : description,
'tags' : tags,
'language' : language,
'publisher' : publisher,
'authors' : publisher,
'smarten_punctuation' : True
}
feeds = [(u'Breaking News', u'http://www.manilatimes.net/index.php/news/breaking-news?format=feed&type=rss'), (u'Top Stories', u'http://www.manilatimes.net/index.php/news/top-stories?format=feed&type=rss'), (u'Headlines', u'http://www.manilatimes.net/index.php/news/headlines-mt?format=feed&type=rss'), (u'Nation', u'http://www.manilatimes.net/index.php/news/nation?format=feed&type=rss'), (u'Regions', u'http://www.manilatimes.net/index.php/news/regions?format=feed&type=rss'), (u'World', u'http://www.manilatimes.net/index.php/news/world?format=feed&type=rss'), (u'Top Business News', u'http://www.manilatimes.net/index.php/business/top-business-news?format=feed&type=rss'), (u'Business Columnist', u'http://www.manilatimes.net/index.php/business/business-columnist?format=feed&type=rss'), (u'Opinion - Editorials', u'http://www.manilatimes.net/index.php/opinion/editorials?format=feed&type=rss'), (u'Opinion - Columnist', u'http://www.manilatimes.net/index.php/opinion/columnist1?format=feed&type=rss'), (u'Opinion - Editorial Cartoon', u'http://www.manilatimes.net/index.php/opinion/editorial-cartoon?format=feed&type=rss'), (u'Top Sports News', u'http://www.manilatimes.net/index.php/sports/top-sports-news?format=feed&type=rss'), (u'Sports Columnist', u'http://www.manilatimes.net/index.php/sports/sports-columnist?format=feed&type=rss'), (u'Life & Times', u'http://www.manilatimes.net/index.php/life-and-times?format=feed&type=rss'), (u'Showtime', u'http://www.manilatimes.net/index.php/life-and-times/showtime?format=feed&type=rss'), (u'Sunday Times', u'http://www.manilatimes.net/index.php/sunday-times?format=feed&type=rss'), (u'Sunday Times Magazine', u'http://www.manilatimes.net/index.php/sunday-times/the-sunday-times-magazines?format=feed&type=rss'), (u'Motoring News', u'http://www.manilatimes.net/index.php/fast-times/motoring-news?format=feed&type=rss'), (u'Motoring Columnist', u'http://www.manilatimes.net/index.php/fast-times/motoring-columnist?format=feed&type=rss'), (u'Technology', u'http://www.manilatimes.net/index.php/technology?format=feed&type=rss')]

View File

@ -0,0 +1,129 @@
import time
from calibre.web.feeds.recipes import BasicNewsRecipe
class PhilippineDailyInquirer(BasicNewsRecipe):
title = 'The Philippine Daily Inquirer'
custom_title = "The Philippine Daily Inquirer - " + time.strftime('%d %b %Y %I:%M %p')
__author__ = 'jde'
__date__ = '03 June 2012'
__version__ = '1.0'
description = 'The Philippine Daily Inquirer is a widely read and circulated newspaper.'
language = 'en_PH'
publisher = 'The Philippine Daily Inquirer'
category = 'news, Philippines'
tags = 'news, Philippines'
cover_url = 'http://www.inquirer.com.ph/assets/bg/logo.jpg'
masthead_url = 'http://www.inquirer.com.ph/assets/bg/logo.jpg'
oldest_article = 1.5 #days
max_articles_per_feed = 25
simultaneous_downloads = 20
publication_type = 'newspaper'
timefmt = ' [%a, %d %b %Y %I:%M %p]'
no_stylesheets = True
use_embedded_content = False
encoding = None
recursions = 0
needs_subscription = False
remove_javascript = True
remove_empty_feeds = True
auto_cleanup = False
remove_tags_after = [
dict(name='div', attrs={'id':'entryMeta'})
,dict(name='div', attrs={'id':'taboola-div'})
,dict(name='br', attrs={'class':'clear'})
]
remove_tags = [
dict(name='div', attrs={'class':'recent'})
,dict(name='div', attrs={'id':'sharefeature'})
,dict(name='div', attrs={'id':'masthead_bg'})
,dict(name='div', attrs={'id':'navmenu_main'})
,dict(name='div', attrs={'id':'navmenu_channel'})
,dict(name='div', attrs={'class':'breadcrumbs'})
,dict(name='div', attrs={'id':'search_container'})
,dict(name='a', attrs={'href':'http://ruby.inquirer.net/redirect/redirect.php?item_id=1143'})
,dict(name='a', attrs={'href':'http://ruby.inquirer.net/redirect/redirect.php?item_id=1147'})
]
conversion_options = { 'title' : custom_title,
'comments' : description,
'tags' : tags,
'language' : language,
'publisher' : publisher,
'authors' : publisher,
'smarten_punctuation' : True
}
feeds = [
('Headlines' , 'http://newsinfo.inquirer.net/category/inquirer-headlines/feed' )
,('Latest Stories' , 'http://newsinfo.inquirer.net/category/latest-stories/feed' )
,('Nation' , 'http://newsinfo.inquirer.net/category/nation/feed' )
,('Nation - Latest Stories' , 'http://newsinfo.inquirer.net/category/latest-stories/nation-latest-stories/feed' )
,('Metro' , 'http://newsinfo.inquirer.net/category/metro/feed' )
,('Metro - Latest Stories' , 'http://newsinfo.inquirer.net/category/latest-stories/metro-latest-stories/feed' )
,('Regions' , 'http://newsinfo.inquirer.net/category/regions/feed' )
,('Regions - Latest Stories' , 'http://newsinfo.inquirer.net/category/latest-stories/regions-latest-stories/feed' )
# ,('News' , 'http://www.inquirer.net/fullfeed' )
# ,('More News' , 'http://newsinfo.inquirer.net/feed' )
,('Global Nation' , 'http://globalnation.inquirer.net/feed' )
,('Global Nation - Latest Stories' , 'http://globalnation.inquirer.net/category/latest-stories/feed' )
,('Global Nation - Philippines' , 'http://globalnation.inquirer.net/category/news/philippines/feed' )
,('Global Nation - Asia & Pacific' , 'http://globalnation.inquirer.net/category/news/asiaaustralia/feed' )
,('Global Nation - Americas' , 'http://globalnation.inquirer.net/category/news/uscanada/feed' )
,('Global Nation - Middle East & Africa' , 'http://globalnation.inquirer.net/category/news/middle-eastafrica/feed' )
,('Global Nation - Europe' , 'http://globalnation.inquirer.net/category/news/europe/feed' )
,('Global Nation - Global Pinoy' , 'http://globalnation.inquirer.net/category/global-pinoy/feed' )
,('Global Nation - Events' , 'http://globalnation.inquirer.net/category/events/feed' )
,('Business' , 'http://business.inquirer.net/feed' )
,('Business - Latest Stories' , 'http://business.inquirer.net/category/latest-stories/feed' )
,('Business - Money' , 'http://business.inquirer.net/category/money/feed' )
,('Business - Science & Health' , 'http://business.inquirer.net/category/science-and-health/feed' )
,('Business - Motoring' , 'http://business.inquirer.net/category/motoring/feed' )
,('Business - Property Guide' , 'http://business.inquirer.net/category/property-guide/feed' )
,('Business - Columnists' , 'http://business.inquirer.net/category/columnists/feed' )
,('Sports' , 'http://sports.inquirer.net/feed' )
,('Sports - Latest Stories' , 'http://sports.inquirer.net/category/latest-stories/feed' )
,('Sports - Basketball' , 'http://sports.inquirer.net/category/section/basketball/feed' )
,('Sports - Boxing & MMA' , 'http://sports.inquirer.net/category/section/boxing-mma/feed' )
,('Sports - Golf' , 'http://sports.inquirer.net/category/section/golf/feed' )
,('Sports - Football' , 'http://sports.inquirer.net/category/section/other-sports/football/feed' )
,('Sports - Other Sports' , 'http://sports.inquirer.net/category/section/other-sports/feed' )
,('Technology' , 'http://technology.inquirer.net/feed' )
,('Technology Latest Stories' , 'http://technology.inquirer.net/category/latest-stories/feed' )
,('Entertainment' , 'http://entertainment.inquirer.net/feed' )
,('Entertainment - Headlines' , 'http://entertainment.inquirer.net/category/headlines/feed' )
,('Entertainment - Latest Stories' , 'http://entertainment.inquirer.net/category/latest-stories/feed' )
,('Entertainment - Movies' , 'http://movies.inquirer.net/feed' )
,('Lifestyle' , 'http://lifestyle.inquirer.net/feed' )
,('Lifestyle - Latest Stories' , 'http://lifestyle.inquirer.net/category/latest-stories/feed' )
,('Lifestyle - Arts & Books' , 'http://lifestyle.inquirer.net/category/arts-and-books/feed' )
,('Lifestyle - Wellness' , 'http://lifestyle.inquirer.net/category/wellness/feed' )
,('Lifestyle - Home & Entertaining' , 'http://lifestyle.inquirer.net/category/home-and-entertaining/feed' )
,('Lifestyle - Parenting' , 'http://lifestyle.inquirer.net/category/parenting/feed' )
,('Lifestyle - Food' , 'http://lifestyle.inquirer.net/category/food/feed' )
,('Lifestyle - Fashion & Beauty' , 'http://lifestyle.inquirer.net/category/fashion-and-beauty/feed' )
,('Lifestyle - Super' , 'http://lifestyle.inquirer.net/category/super/feed' )
,('Lifestyle - 2BU' , 'http://lifestyle.inquirer.net/category/2bu/feed' )
,('Lifestyle - Sunday Lifestyle' , 'http://lifestyle.inquirer.net/category/sunday-lifestyle/feed' )
,('Lifestyle - Wedding' , 'http://lifestyle.inquirer.net/category/sunday-lifestyle/wedding/feed' )
,('Lifestyle - Travel' , 'http://lifestyle.inquirer.net/category/sunday-lifestyle/travel/feed' )
,('Lifestyle - Relationship' , 'http://lifestyle.inquirer.net/category/sunday-lifestyle/relationship/feed' )
,('Opinion' , 'http://opinion.inquirer.net/feed' )
,('Opinion - Viewpoints' , 'http://opinion.inquirer.net/category/viewpoints/feed' )
,('Opinion - Talk of the Town' , 'http://opinion.inquirer.net/category/inquirer-opinion/talk-of-the-town/feed' )
,('Editorial' , 'http://opinion.inquirer.net/category/editorial/feed' )
,('Letters to the Editor' , 'http://opinion.inquirer.net/category/letters-to-the-editor/feed' )
,('Columns' , 'http://opinion.inquirer.net/category/columns/feed' )
,('Citizens Journalism' , 'http://newsinfo.inquirer.net/category/citizens-journalism/feed' )
,('Cebu - Daily News' , 'http://newsinfo.inquirer.net/category/cdn/feed' )
,('Cebu - More News' , 'http://newsinfo.inquirer.net/category/cdn/cdn-news/feed' )
,('Cebu - Community' , 'http://newsinfo.inquirer.net/category/cdn/cdn-community/feed' )
,('Cebu - Metro' , 'http://newsinfo.inquirer.net/category/cdn/cdn-metro/feed' )
,('Cebu - Business' , 'http://newsinfo.inquirer.net/category/cdn/cdn-enterprise/feed' )
,('Cebu - Sports' , 'http://newsinfo.inquirer.net/category/cdn/cdn-sports/feed' )
,('Cebu - Visayas' , 'http://newsinfo.inquirer.net/category/cdn/cdn-visayas/feed' )
,('Cebu - Opinion' , 'http://newsinfo.inquirer.net/category/cdn/cdn-opinion/feed' )
]

View File

@ -0,0 +1,97 @@
'''
www.philstar.com
'''
import time
from calibre.web.feeds.recipes import BasicNewsRecipe
class PhilippineStar(BasicNewsRecipe):
title = 'The Philippine Star'
custom_title = "The Philippine Star - " + time.strftime('%d %b %Y %I:%M %p')
__author__ = 'jde'
__date__ = '31 May 2012'
__version__ = '1.0'
description = 'The Philippine Star is a daily English-language broadsheet newspaper based in Manila. It has the most subscribers of any newspaper in the Philippines - philstar.com is a Philippine news and entertainment portal for the Filipino global community. It is the online presence of the STAR Group of Publications, a leading publisher of newspapers and magazines in the Philippines.'
language = 'en_PH'
publisher = 'The Philippine STAR'
category = 'news, Philippines'
tags = 'news, Philippines'
cover_url = 'http://www.philstar.com/images/philstar-logo-white.jpg'
masthead_url = 'http://www.philstar.com/images/philstar-logo-white.jpg'
oldest_article = 1 #days
max_articles_per_feed = 25
simultaneous_downloads = 20
publication_type = 'newspaper'
timefmt = ' [%a, %d %b %Y %I:%M %p]'
no_stylesheets = True
use_embedded_content = False
encoding = None
recursions = 0
needs_subscription = False
remove_javascript = True
remove_empty_feeds = True
auto_cleanup = False
remove_tags = [dict(name='img', attrs={'id':'Image1'}) #Logo
,dict(name='span', attrs={'id':'ControlArticle1_LabelHeader'}) #Section (Headlines, Nation, Metro, ...)
,dict(name='a', attrs={'id':'ControlArticle1_FormView1_hlComments'}) #Comments
,dict(name='img', attrs={'src':'images/post-comments.jpg'}) #View Comments
,dict(name='a', attrs={'id':'ControlArticle1_FormView1_ControlPhotoAndCaption1_hlImageCaption'}) #Zoom
]
conversion_options = { 'title' : custom_title,
'comments' : description,
'tags' : tags,
'language' : language,
'publisher' : publisher,
'authors' : publisher,
'smarten_punctuation' : True
}
feeds = [
('Headlines' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=63' )
,('Breaking News' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=200' )
,('News Feature' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=68' )
,('Nation' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=67' )
,('Metro' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=65' )
,('Business' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=66' )
,('Sports' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=69' )
,('Entertainment' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=70' )
,('Science & Technology' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=75' )
,('Networks' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=71' )
,('Business as Usual' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=78' )
,('Banking' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=74' )
,('Motoring' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=72' )
,('Real Estate' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=76' )
,('Telecoms' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=73' )
,('Agriculture' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=77' )
,('Arts & Culture' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=79' )
,('Food & Leisure' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=81' )
,('Health & Family' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=80' )
,('Education & Home' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=442' )
,('Travel & Tourism' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=87' )
,('Newsmakers' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=88' )
,('Business Life' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=82' )
,('Fashion & Beauty' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=83' )
,('For Men' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=446' )
,('Gadgets' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=449' )
,('Sunday Life' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=86' )
,('Supreme' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=448' )
,('Opinion' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=64' )
,('Letters to the Editor' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=135' )
,('Starweek Magazine' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=90' )
,('Modern Living' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=85' )
,('YStyle' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=451' )
,('Allure' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=89' )
,('Weather' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=116' )
]
# process the printer friendly version of article
def print_version(self, url):
return url.replace('/Article', '/ArticlePrinterFriendly')
# obtain title from printer friendly version of article; avoiding add_toc_thumbnail changing title when article has image
def populate_article_metadata(self, article, soup, first):
article.title = soup.find('span', {'id': 'ControlArticle1_FormView1_ArticleHeaderLabel'}).contents[0].strip()

View File

@ -133,6 +133,7 @@ _extra_lang_codes = {
'en_TR' : _('English (Turkey)'),
'en_CY' : _('English (Cyprus)'),
'en_CZ' : _('English (Czech Republic)'),
'en_PH' : _('English (Philippines)'),
'en_PK' : _('English (Pakistan)'),
'en_HR' : _('English (Croatia)'),
'en_HK' : _('English (Hong Kong)'),