calibre/resources/recipes/the_oz.recipe
2009-10-22 17:21:35 -06:00

77 lines
3.9 KiB
Python

#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2009, Matthew Briggs'
__docformat__ = 'restructuredtext en'
'''
http://www.theaustralian.news.com.au/
'''
from calibre.web.feeds.news import BasicNewsRecipe
class DailyTelegraph(BasicNewsRecipe):
title = u'The Australian'
__author__ = u'Matthew Briggs and Sujata Raman'
description = u'National broadsheet newspaper from down under - colloquially known as The Oz'
language = 'en_AU'
oldest_article = 2
max_articles_per_feed = 10
remove_javascript = True
no_stylesheets = True
encoding = 'utf8'
html2lrf_options = [
'--comment' , description
, '--category' , 'news, Australia'
, '--publisher' , title
]
keep_only_tags = [
dict(name='h1', attrs={'class':'section-heading'})
,dict(name='div', attrs={'id':'article'})
]
remove_tags = [dict(name=['object','link'])]
extra_css = '''
h1{font-family :Georgia,"Times New Roman",Times,serif; font-size:large; }
#article{font-family :Georgia,"Times New Roman",Times,serif; font-size: x-small;}
.module-subheader{font-family :Tahoma,Geneva,Arial,Helvetica,sans-serif; color:#666666; font-size: xx-small;}
.intro{ font-family:Trebuchet MS,Trebuchet,Helvetica,sans-serif;font-size: x-small; }
.article-source{font-family:Trebuchet MS,Trebuchet,Helvetica,sans-serif; color:#666666; font-size: xx-small;}
.caption{font-family:Trebuchet MS,Trebuchet,Helvetica,sans-serif; font-size: xx-small;}
'''
feeds = [
(u'News', u'http://feeds.news.com.au/public/rss/2.0/aus_news_807.xml'),
(u'World News', u'http://feeds.news.com.au/public/rss/2.0/aus_world_808.xml'),
(u'Opinion', u'http://feeds.news.com.au/public/rss/2.0/aus_opinion_58.xml'),
(u'Business', u'http://feeds.news.com.au/public/rss/2.0/aus_business_811.xml'),
(u'Media', u'http://feeds.news.com.au/public/rss/2.0/aus_media_57.xml'),
(u'Higher Education', u'http://feeds.news.com.au/public/rss/2.0/aus_higher_education_56.xml'),
(u'The Arts', u'http://feeds.news.com.au/public/rss/2.0/aus_arts_51.xml'),
(u'Commercial Property', u'http://feeds.news.com.au/public/rss/2.0/aus_business_commercial_property_708.xml'),
(u'The Nation', u'http://feeds.news.com.au/public/rss/2.0/aus_the_nation_62.xml'),
(u'Sport', u'http://feeds.news.com.au/public/rss/2.0/aus_sport_61.xml'),
(u'Travel', u'http://feeds.news.com.au/public/rss/2.0/aus_travel_and_indulgence_63.xml'),
(u'Defence', u'http://feeds.news.com.au/public/rss/2.0/aus_defence_54.xml'),
(u'Aviation', u'http://feeds.news.com.au/public/rss/2.0/aus_business_aviation_706.xml'),
(u'Mining', u'http://feeds.news.com.au/public/rss/2.0/aus_business_mining_704.xml'),
(u'Climate', u'http://feeds.news.com.au/public/rss/2.0/aus_climate_809.xml'),
(u'Property', u'http://feeds.news.com.au/public/rss/2.0/aus_property_59.xml'),
(u'US Election', u'http://feeds.news.com.au/public/rss/2.0/aus_uselection_687.xml')
]
def get_cover_url(self):
href = 'http://www.theaustralian.news.com.au/'
soup = self.index_to_soup(href)
img = soup.find('img',alt ="Digital editions of The Australian")
print img
if img :
cover_url = img['src']
return cover_url