calibre/recipes/sunday_times_magazine.recipe
a10kiloham 8761f0cb60
Update login mechanism for Times Online
Fixes #1025 (Update login mechanism)
Fixes #1026 (Fix login mechanism)
2019-08-06 20:46:58 +05:30

152 lines
5.3 KiB
Plaintext

__license__ = 'GPL v3'
__copyright__ = '2010-2019'
'''
www.thetimes.co.uk/magazine/the-sunday-times-magazine/
'''
from mechanize import Request
from calibre import random_user_agent
from calibre.web.feeds.news import BasicNewsRecipe
def classes(classes):
q = frozenset(classes.split(' '))
return dict(attrs={
'class': lambda x: x and frozenset(x.split()).intersection(q)})
class TimesOnline(BasicNewsRecipe):
title = 'The Sunday Times Magazine UK'
__author__ = 'Bobby Steel & Darko Miletic'
description = 'Newsmagazine from United Kingdom and World'
language = 'en_GB'
publisher = 'Times Newspapers Ltd'
category = 'news, politics, UK'
oldest_article = 3
max_articles_per_feed = 500
no_stylesheets = True
use_embedded_content = False
encoding = 'utf-8'
delay = 1
needs_subscription = True
publication_type = 'newspaper'
INDEX = 'https://www.thetimes.co.uk'
LOGIN = 'https://login.thetimes.co.uk/'
PREFIX = u'https://www.thetimes.co.uk'
extra_css = """
.author-name,.authorName{font-style: italic}
.published-date,.multi-position-photo-text{
font-family: Arial,Helvetica,sans-serif;
font-size: small; color: gray;
display:block; margin-bottom: 0.5em}
body{font-family: Georgia,"Times New Roman",Times,serif}
"""
conversion_options = {
'comment': description,
'tags': category,
'publisher': publisher,
'language': language}
def get_browser(self, *a, **kw):
start_url = self.INDEX
kw['user_agent'] = random_user_agent(allow_ie=False)
br = BasicNewsRecipe.get_browser(self, *a, **kw)
self.log('Starting login process...')
res = br.open(start_url)
sso_url = res.geturl()
self.log(sso_url)
request_query = {
'username': self.username,
'password': self.password,
's': 1,
'gotoUrl': self.INDEX,
}
rq = Request(self.LOGIN, headers={
'Accept': 'text/html',
'Accept-Language': 'en-US,en;q=0.8',
'X-HTTP-Method-Override': 'POST',
'X-Requested-With': 'XMLHttpRequest',
}, data=request_query)
self.log('Sending login request...')
res = br.open(rq)
return br
# }}}
def get_cover_url(self):
from datetime import date
from datetime import timedelta
today = date.today()
today_index = today.weekday()
if (today_index == 5): # new edition drops on Saturday AM
today += timedelta(1)
elif (today_index < 5): # Mon-Thurs
today_index = (
today_index + 1
) % 7 # Recalibrate to days back MON = 0, SUN = 6 -> SUN = 0 .. SAT = 6
today = today - timedelta(today_index) # Rewind to most recent Sunday
cover = 'https://cdn2-img.pressreader.com/pressdisplay/docserver/getimage.aspx?file=1174' + today.strftime(
'%Y') + today.strftime('%m') + today.strftime(
'%d') + '00000000001001&page=1&scale=100'
self.log(cover)
br = BasicNewsRecipe.get_browser(self)
try:
br.open(cover)
except:
self.log("\nCover unavailable")
cover = None
return cover
remove_tags = [
classes('Topics is-hidden Tooltip Toolbar Comments RelatedLinks'),
{'name': ['object', 'link', 'iframe', 'base', 'meta', 'script']}, {
'attrs': {
'class': [
'tools comments-parent', 'u-hide', 'Tooltip',
'Toolbar Toolbar--bottom', 'Comments Article-container',
'ArticlePager', 'Media-caption', 'RelatedLinks']}}, {
'attrs': {
'class': lambda x: x and 'Toolbar' in x}}]
remove_attributes = ['lang']
keep_only_tags = [
dict(attrs={'id': 'article-main'}),
dict(attrs={'class': 'f-author'}),
dict(attrs={'id': 'bodycopy'})]
feeds = [(
u'The Sunday Times Magazine',
u'http://www.thetimes.co.uk/magazine/the-sunday-times-magazine/'),
(u'Sunday Times Style', u'http://www.thetimes.co.uk/magazine/style/')]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
return self.adeify_images(soup)
def parse_index(self):
totalfeeds = []
lfeeds = self.get_feeds()
for feedobj in lfeeds:
feedtitle, feedurl = feedobj
self.report_progress(
0,
_('Fetching feed') + ' %s...' %
(feedtitle if feedtitle else feedurl))
articles = []
soup = self.index_to_soup(feedurl)
for atag in soup.findAll('a', href=True):
parentName = atag.parent.name
title = self.tag_to_string(atag).strip()
if (
parentName == 'h2' or
parentName == 'h3') and title is not None and title != '':
url = self.INDEX + atag['href']
articles.append({
'title': title,
'date': '',
'url': url,
'description': ''})
totalfeeds.append((feedtitle, articles))
return totalfeeds