mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
Update LA Times
Fixes #1660894 [Fetching news from LA Times broken](https://bugs.launchpad.net/calibre/+bug/1660894)
This commit is contained in:
parent
36e9dcf2ad
commit
c4f196d6c3
@ -1,9 +1,10 @@
|
|||||||
#!/usr/bin/env python2
|
#!/usr/bin/env python2
|
||||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
|
||||||
from __future__ import (unicode_literals, division, absolute_import,
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
print_function)
|
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
from pprint import pformat
|
from pprint import pformat
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
@ -26,7 +27,6 @@ class LATimes(BasicNewsRecipe):
|
|||||||
language = 'en'
|
language = 'en'
|
||||||
remove_empty_feeds = True
|
remove_empty_feeds = True
|
||||||
publication_type = 'newspaper'
|
publication_type = 'newspaper'
|
||||||
masthead_url = 'http://www.latimes.com/images/logo.png'
|
|
||||||
cover_url = 'http://www.latimes.com/includes/sectionfronts/A1.pdf'
|
cover_url = 'http://www.latimes.com/includes/sectionfronts/A1.pdf'
|
||||||
|
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
@ -42,10 +42,18 @@ class LATimes(BasicNewsRecipe):
|
|||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
soup = self.index_to_soup('http://www.latimes.com')
|
soup = self.index_to_soup('http://www.latimes.com')
|
||||||
feeds = defaultdict(list)
|
feeds = defaultdict(list)
|
||||||
for x in soup.findAll(attrs={'data-content-type': 'story', 'data-content-section': True, 'data-content-url': True, 'data-content-title': True}):
|
for x in soup.findAll(
|
||||||
url = absurl(x['data-content-url'])
|
attrs={
|
||||||
|
'data-content-type': 'story',
|
||||||
|
'data-content-section': True,
|
||||||
|
'data-content-slug': True,
|
||||||
|
}
|
||||||
|
):
|
||||||
|
a = x.find('a', attrs={'class': lambda x: not x or 'SectionHeading' not in x})
|
||||||
|
if a is not None:
|
||||||
|
url = absurl(a['href'])
|
||||||
section = x['data-content-section'].capitalize()
|
section = x['data-content-section'].capitalize()
|
||||||
title = x['data-content-title']
|
title = self.tag_to_string(a)
|
||||||
feeds[section].append({'title': title, 'url': url})
|
feeds[section].append({'title': title, 'url': url})
|
||||||
self.log(pformat(dict(feeds)))
|
self.log(pformat(dict(feeds)))
|
||||||
return [(k, feeds[k]) for k in sorted(feeds)]
|
return [(k, feeds[k]) for k in sorted(feeds)]
|
||||||
|
Loading…
x
Reference in New Issue
Block a user