mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
90 lines
4.7 KiB
Python
90 lines
4.7 KiB
Python
#!/usr/bin/env python
|
|
__license__ = 'GPL v3'
|
|
__copyright__ = '2009, Matthew Briggs'
|
|
__docformat__ = 'restructuredtext en'
|
|
|
|
'''
|
|
http://www.theaustralian.news.com.au/
|
|
'''
|
|
|
|
from calibre.web.feeds.news import BasicNewsRecipe
|
|
|
|
class DailyTelegraph(BasicNewsRecipe):
|
|
title = u'The Australian'
|
|
__author__ = u'Matthew Briggs and Sujata Raman'
|
|
description = (u'National broadsheet newspaper from down under - colloquially known as The Oz'
|
|
'. You will need to have a subscription to '
|
|
'http://www.theaustralian.com.au to get full articles.')
|
|
language = 'en_AU'
|
|
|
|
oldest_article = 2
|
|
needs_subscription = 'optional'
|
|
max_articles_per_feed = 30
|
|
remove_javascript = True
|
|
no_stylesheets = True
|
|
encoding = 'utf8'
|
|
|
|
keep_only_tags = [dict(name='div', attrs={'id': 'story'})]
|
|
|
|
#remove_tags = [dict(name=['object','link'])]
|
|
remove_tags = [dict(name ='div', attrs = {'class': 'story-info'}),
|
|
dict(name ='div', attrs = {'class': 'story-header-tools'}),
|
|
dict(name ='div', attrs = {'class': 'story-sidebar'}),
|
|
dict(name ='div', attrs = {'class': 'story-footer'}),
|
|
dict(name ='div', attrs = {'id': 'comments'}),
|
|
dict(name ='div', attrs = {'class': 'story-extras story-extras-2'}),
|
|
dict(name ='div', attrs = {'class': 'group item-count-1 story-related'})
|
|
]
|
|
|
|
extra_css = '''
|
|
h1{font-family :Georgia,"Times New Roman",Times,serif; font-size:large; }
|
|
#article{font-family :Georgia,"Times New Roman",Times,serif; font-size: x-small;}
|
|
.module-subheader{font-family :Tahoma,Geneva,Arial,Helvetica,sans-serif; color:#666666; font-size: xx-small;}
|
|
.intro{ font-family:Trebuchet MS,Trebuchet,Helvetica,sans-serif;font-size: x-small; }
|
|
.article-source{font-family:Trebuchet MS,Trebuchet,Helvetica,sans-serif; color:#666666; font-size: xx-small;}
|
|
.caption{font-family:Trebuchet MS,Trebuchet,Helvetica,sans-serif; font-size: xx-small;}
|
|
'''
|
|
|
|
feeds = [ (u'News', u'http://feeds.news.com.au/public/rss/2.0/aus_news_807.xml'),
|
|
(u'Opinion', u'http://feeds.news.com.au/public/rss/2.0/aus_opinion_58.xml'),
|
|
(u'The Nation', u'http://feeds.news.com.au/public/rss/2.0/aus_the_nation_62.xml'),
|
|
(u'World News', u'http://feeds.news.com.au/public/rss/2.0/aus_world_808.xml'),
|
|
(u'US Election', u'http://feeds.news.com.au/public/rss/2.0/aus_uselection_687.xml'),
|
|
(u'Climate', u'http://feeds.news.com.au/public/rss/2.0/aus_climate_809.xml'),
|
|
(u'Media', u'http://feeds.news.com.au/public/rss/2.0/aus_media_57.xml'),
|
|
(u'IT', u'http://feeds.news.com.au/public/rss/2.0/ausit_itnews_topstories_367.xml'),
|
|
(u'Exec Tech', u'http://feeds.news.com.au/public/rss/2.0/ausit_exec_topstories_385.xml'),
|
|
(u'Higher Education', u'http://feeds.news.com.au/public/rss/2.0/aus_higher_education_56.xml'),
|
|
(u'Arts', u'http://feeds.news.com.au/public/rss/2.0/aus_arts_51.xml'),
|
|
(u'Travel', u'http://feeds.news.com.au/public/rss/2.0/aus_travel_and_indulgence_63.xml'),
|
|
(u'Property', u'http://feeds.news.com.au/public/rss/2.0/aus_property_59.xml'),
|
|
(u'Sport', u'http://feeds.news.com.au/public/rss/2.0/aus_sport_61.xml'),
|
|
(u'Business', u'http://feeds.news.com.au/public/rss/2.0/aus_business_811.xml'),
|
|
(u'Aviation', u'http://feeds.news.com.au/public/rss/2.0/aus_business_aviation_706.xml'),
|
|
(u'Commercial Property', u'http://feeds.news.com.au/public/rss/2.0/aus_business_commercial_property_708.xml'),
|
|
(u'Mining', u'http://feeds.news.com.au/public/rss/2.0/aus_business_mining_704.xml')]
|
|
|
|
def get_browser(self):
|
|
br = BasicNewsRecipe.get_browser(self)
|
|
if self.username and self.password:
|
|
br.open('http://www.theaustralian.com.au')
|
|
br.select_form(nr=0)
|
|
br['username'] = self.username
|
|
br['password'] = self.password
|
|
raw = br.submit().read()
|
|
if '>log out' not in raw.lower():
|
|
raise ValueError('Failed to log in to www.theaustralian.com.au'
|
|
' are your username and password correct?')
|
|
return br
|
|
|
|
def get_article_url(self, article):
|
|
return article.id
|
|
|
|
#br = self.get_browser()
|
|
#br.open(article.link).read()
|
|
#print br.geturl()
|
|
|
|
#return br.geturl()
|
|
|
|
|