mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
...
This commit is contained in:
parent
016ec7ade3
commit
872a1434c7
@ -2,6 +2,7 @@
|
|||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008, Kovid Goyal <kovid@kovidgoyal.net>'
|
__copyright__ = '2008, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
|
|
||||||
'''
|
'''
|
||||||
time.com
|
time.com
|
||||||
'''
|
'''
|
||||||
@ -11,28 +12,23 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
|||||||
from lxml import html
|
from lxml import html
|
||||||
|
|
||||||
class Time(BasicNewsRecipe):
|
class Time(BasicNewsRecipe):
|
||||||
#recipe_disabled = ('This recipe has been disabled as TIME no longer'
|
|
||||||
# ' publish complete articles on the web.')
|
|
||||||
title = u'Time'
|
title = u'Time'
|
||||||
__author__ = 'Kovid Goyal'
|
__author__ = 'Kovid Goyal, Rick Shang'
|
||||||
description = ('Weekly US magazine.')
|
description = ('Weekly US magazine.')
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
language = 'en'
|
language = 'en'
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
#needs_subscription = 'optional'
|
needs_subscription = 'optional'
|
||||||
|
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
{
|
{
|
||||||
'class':['artHd', 'articleContent',
|
'class':['tout1', 'entry-content', 'external-gallery-img', 'image-meta']
|
||||||
'entry-title','entry-meta', 'entry-content', 'thumbnail']
|
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
{'class':['content-tools', 'quigo', 'see',
|
{'class':['thumbnail', 'button']},
|
||||||
'first-tier-social-tools', 'navigation', 'enlarge lightbox']},
|
|
||||||
{'id':['share-tools']},
|
|
||||||
{'rel':'lightbox'},
|
|
||||||
]
|
]
|
||||||
|
|
||||||
recursions = 10
|
recursions = 10
|
||||||
@ -43,17 +39,22 @@ class Time(BasicNewsRecipe):
|
|||||||
|
|
||||||
def get_browser(self):
|
def get_browser(self):
|
||||||
br = BasicNewsRecipe.get_browser(self)
|
br = BasicNewsRecipe.get_browser(self)
|
||||||
if False and self.username and self.password:
|
# This site uses javascript in its login process
|
||||||
# This site uses javascript in its login process
|
if False and self.username is not None and self.password is not None:
|
||||||
res = br.open('http://www.time.com/time/magazine')
|
br.open('http://www.time.com/time/magazine')
|
||||||
br.select_form(nr=1)
|
br.select_form(predicate=lambda f: 'action' in f.attrs and f.attrs['action'] == 'https://auth.time.com/login.php')
|
||||||
br['username'] = self.username
|
br['username'] = self.username
|
||||||
br['password'] = self.password
|
br['password'] = self.password
|
||||||
res = br.submit()
|
br['magcode'] = ['TD']
|
||||||
raw = res.read()
|
br.find_control('turl').readonly = False
|
||||||
|
br['turl'] = 'http://www.time.com/time/magazine'
|
||||||
|
br.find_control('rurl').readonly = False
|
||||||
|
br['rurl'] = 'http://www.time.com/time/magazine'
|
||||||
|
br['remember'] = False
|
||||||
|
raw = br.submit().read()
|
||||||
if '>Log Out<' not in raw:
|
if '>Log Out<' not in raw:
|
||||||
raise ValueError('Failed to login to time.com, check'
|
raise ValueError('Failed to login to time.com, check'
|
||||||
' your username and password')
|
' your username and password')
|
||||||
return br
|
return br
|
||||||
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
@ -70,6 +71,9 @@ class Time(BasicNewsRecipe):
|
|||||||
except:
|
except:
|
||||||
self.log.exception('Failed to fetch cover')
|
self.log.exception('Failed to fetch cover')
|
||||||
|
|
||||||
|
dates = ''.join(root.xpath('//time[@class="updated"]/text()'))
|
||||||
|
if dates:
|
||||||
|
self.timefmt = ' [%s]'%dates
|
||||||
|
|
||||||
feeds = []
|
feeds = []
|
||||||
parent = root.xpath('//div[@class="content-main-aside"]')[0]
|
parent = root.xpath('//div[@class="content-main-aside"]')[0]
|
||||||
@ -96,7 +100,9 @@ class Time(BasicNewsRecipe):
|
|||||||
title = html.tostring(a[0], encoding=unicode,
|
title = html.tostring(a[0], encoding=unicode,
|
||||||
method='text').strip()
|
method='text').strip()
|
||||||
if not title: continue
|
if not title: continue
|
||||||
url = a[0].get('href')
|
url = a[0].get('href')
|
||||||
|
# url = re.sub('/magazine/article/0,9171','/subscriber/printout/0,8816',
|
||||||
|
# url)
|
||||||
if url.startswith('/'):
|
if url.startswith('/'):
|
||||||
url = 'http://www.time.com'+url
|
url = 'http://www.time.com'+url
|
||||||
desc = ''
|
desc = ''
|
||||||
@ -111,9 +117,3 @@ class Time(BasicNewsRecipe):
|
|||||||
'date' : '',
|
'date' : '',
|
||||||
'description' : desc
|
'description' : desc
|
||||||
}
|
}
|
||||||
|
|
||||||
def postprocess_html(self,soup,first):
|
|
||||||
for tag in soup.findAll(attrs ={'class':['artPag','pagination']}):
|
|
||||||
tag.extract()
|
|
||||||
return soup
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user