mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Update The Australian
This commit is contained in:
parent
8aaf03ef28
commit
2157b1dcf4
@ -7,11 +7,13 @@ __docformat__ = 'restructuredtext en'
|
||||
http://www.theaustralian.news.com.au/
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre import browser
|
||||
from calibre.web.feeds.jsnews import JavascriptRecipe
|
||||
from calibre.web.feeds import feed_from_xml
|
||||
|
||||
class DailyTelegraph(BasicNewsRecipe):
|
||||
class DailyTelegraph(JavascriptRecipe):
|
||||
title = u'The Australian'
|
||||
__author__ = u'Matthew Briggs and Sujata Raman'
|
||||
__author__ = u'Kovid Goyal'
|
||||
description = (u'National broadsheet newspaper from down under - colloquially known as The Oz'
|
||||
'. You will need to have a subscription to '
|
||||
'http://www.theaustralian.com.au to get full articles.')
|
||||
@ -23,27 +25,16 @@ class DailyTelegraph(BasicNewsRecipe):
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
encoding = 'utf8'
|
||||
remove_empty_feeds = True
|
||||
ignore_duplicate_articles = {'url'}
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id': 'story'})]
|
||||
|
||||
# remove_tags = [dict(name=['object','link'])]
|
||||
remove_tags = [dict(name='div', attrs={'class': 'story-info'}),
|
||||
dict(name='div', attrs={'class': 'story-header-tools'}),
|
||||
dict(name='div', attrs={'class': 'story-sidebar'}),
|
||||
dict(name='div', attrs={'class': 'story-footer'}),
|
||||
dict(name='div', attrs={'id': 'comments'}),
|
||||
dict(name='div', attrs={'class': 'story-extras story-extras-2'}),
|
||||
dict(name='div', attrs={'class': 'group item-count-1 story-related'})
|
||||
]
|
||||
|
||||
extra_css = '''
|
||||
h1{font-family :Georgia,"Times New Roman",Times,serif; font-size:large; }
|
||||
#article{font-family :Georgia,"Times New Roman",Times,serif; font-size: x-small;}
|
||||
.module-subheader{font-family :Tahoma,Geneva,Arial,Helvetica,sans-serif; color:#666666; font-size: xx-small;}
|
||||
.intro{ font-family:Trebuchet MS,Trebuchet,Helvetica,sans-serif;font-size: x-small; }
|
||||
.article-source{font-family:Trebuchet MS,Trebuchet,Helvetica,sans-serif; color:#666666; font-size: xx-small;}
|
||||
.caption{font-family:Trebuchet MS,Trebuchet,Helvetica,sans-serif; font-size: xx-small;}
|
||||
'''
|
||||
keep_only_tags = ['div#story']
|
||||
remove_tags = [
|
||||
'.story-info', '.story-header-tools', '.module-controls', '.story-sidebar',
|
||||
'.story-footer', '#comments', '.story-extras', '.story-related', '.vms-nav',
|
||||
'.vms-endcard', '.vms-discover', '.share-tools', '.story-comments-link',
|
||||
'.vms-controls', '.ooyala-player', '.vms-countdown', '.vms-header', '.comments',
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'News', u'http://feeds.news.com.au/public/rss/2.0/aus_news_807.xml'),
|
||||
@ -63,29 +54,34 @@ class DailyTelegraph(BasicNewsRecipe):
|
||||
(u'Business', u'http://feeds.news.com.au/public/rss/2.0/aus_business_811.xml'),
|
||||
(u'Aviation', u'http://feeds.news.com.au/public/rss/2.0/aus_business_aviation_706.xml'),
|
||||
(u'Commercial Property', u'http://feeds.news.com.au/public/rss/2.0/aus_business_commercial_property_708.xml'),
|
||||
(u'Mining', u'http://feeds.news.com.au/public/rss/2.0/aus_business_mining_704.xml')]
|
||||
(u'Mining', u'http://feeds.news.com.au/public/rss/2.0/aus_business_mining_704.xml')
|
||||
]
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser(self)
|
||||
if self.username and self.password:
|
||||
br.open('http://www.theaustralian.com.au')
|
||||
br.select_form(nr=1)
|
||||
br['username'] = self.username
|
||||
br['password'] = self.password
|
||||
raw = br.submit().read()
|
||||
if '>log out' not in raw.lower():
|
||||
raise ValueError('Failed to log in to www.theaustralian.com.au'
|
||||
' are your username and password correct?')
|
||||
return br
|
||||
def get_publication_data(self, br):
|
||||
br = browser()
|
||||
ans = {}
|
||||
feeds = ans['index'] = []
|
||||
for title, url in self.feeds:
|
||||
raw = br.open_novisit(url).read()
|
||||
self.log('Fetching feed: %s' % title)
|
||||
feed = feed_from_xml(raw, title=title, log=self.log,
|
||||
oldest_article=self.oldest_article, max_articles_per_feed=self.max_articles_per_feed, get_article_url=self.get_article_url)
|
||||
if len(feed) > 0:
|
||||
feeds.append((title, [
|
||||
{'title':a.title, 'url':a.url, 'description':a.text_summary} for a in feed.articles]))
|
||||
return ans
|
||||
|
||||
def do_login(self, browser, username, password):
|
||||
if username and password:
|
||||
browser.visit('http://www.theaustralian.com.au/login')
|
||||
form = browser.select_form('form[action="https://idp.news.com.au/idp/Authn/rest"]')
|
||||
form['username'] = username
|
||||
form['password'] = password
|
||||
browser.submit(submit_control_selector='button[type="submit"]', timeout=60)
|
||||
if '>Log Out' not in browser.html:
|
||||
raise ValueError('Failed to log in, check your username and password')
|
||||
|
||||
def get_article_url(self, article):
|
||||
return article.id
|
||||
|
||||
# br = self.get_browser()
|
||||
# br.open(article.link).read()
|
||||
# print br.geturl()
|
||||
|
||||
# return br.geturl()
|
||||
|
||||
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user