calibre/recipes/the_freeman.recipe
Kovid Goyal 567040ee1e Perform PEP8 compliance checks on the entire codebase
Some bits of PEP 8 are turned off via setup.cfg
2016-07-29 21:25:17 +05:30

74 lines
3.5 KiB
Plaintext

'''
www.philstar.com
'''
import time
from calibre.web.feeds.recipes import BasicNewsRecipe
class Freeman(BasicNewsRecipe):
title = 'The Freeman'
custom_title = "The Freeman - " + time.strftime('%d %b %Y %I:%M %p')
__author__ = 'jde'
__date__ = '31 May 2012'
__version__ = '1.0'
description = 'The Freeman is a daily English-language newspaper published in Cebu, Philippines, by the Philippine Star. It was the first newspaper in Cebu, first published in May 1919. The motto of the newspaper is "The fair and fearless" - philstar.com is a Philippine news and entertainment portal for the Filipino global community. It is the online presence of the STAR Group of Publications, a leading publisher of newspapers and magazines in the Philippines.' # noqa
language = 'en_PH'
publisher = 'The Philippine STAR'
category = 'news, Philippines'
tags = 'news, Philippines'
cover_url = 'http://www.philstar.com/images/logo_Freeman.jpg'
masthead_url = 'http://www.philstar.com/images/logo_Freeman.jpg'
oldest_article = 1.5 # days
max_articles_per_feed = 25
simultaneous_downloads = 10
publication_type = 'newspaper'
timefmt = ' [%a, %d %b %Y %I:%M %p]'
no_stylesheets = True
use_embedded_content = False
encoding = None
recursions = 0
needs_subscription = False
remove_javascript = True
remove_empty_feeds = True
auto_cleanup = False
remove_tags = [dict(name='img', attrs={'id': 'Image1'}) # Logo
# Section (Headlines, Nation, Metro, ...)
# Comments
# View Comments
# Zoom
, dict(name='span', attrs={'id': 'ControlArticle1_LabelHeader'}), dict(name='a', attrs={'id': 'ControlArticle1_FormView1_hlComments'}), dict(name='img', attrs={'src': 'images/post-comments.jpg'}), dict(name='a', attrs={'id': 'ControlArticle1_FormView1_ControlPhotoAndCaption1_hlImageCaption'}) # noqa
]
conversion_options = {'title': custom_title,
'comments': description,
'tags': tags,
'language': language,
'publisher': publisher,
'authors': publisher,
'smarten_punctuation': True
}
feeds = [
('Cebu News' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=107'),
('Freeman Opinion' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=109'),
('Metro Cebu' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=531'),
('Region' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=530'),
('Cebu Business' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=108'),
('Cebu Sports' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=110'),
('Cebu Lifestyle' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=111'),
('Cebu Entertainment' , 'http://rss.philstar.com/Rss.aspx?publicationSubCategoryId=51')
]
# process the printer friendly version of article
def print_version(self, url):
return url.replace('/Article', '/ArticlePrinterFriendly')
# obtain title from printer friendly version of article; avoiding
# add_toc_thumbnail changing title when article has image
def populate_article_metadata(self, article, soup, first):
article.title = soup.find(
'span', {'id': 'ControlArticle1_FormView1_ArticleHeaderLabel'}).contents[0].strip()