Update LA Times

Fixes #1660894 [Fetching news from LA Times broken](https://bugs.launchpad.net/calibre/+bug/1660894)
This commit is contained in:
Kovid Goyal 2017-02-01 12:18:32 +05:30
parent 36e9dcf2ad
commit c4f196d6c3

View File

@ -1,9 +1,10 @@
#!/usr/bin/env python2 #!/usr/bin/env python2
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
from __future__ import (unicode_literals, division, absolute_import, from __future__ import absolute_import, division, print_function, unicode_literals
print_function)
from collections import defaultdict from collections import defaultdict
from pprint import pformat from pprint import pformat
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
@ -26,7 +27,6 @@ class LATimes(BasicNewsRecipe):
language = 'en' language = 'en'
remove_empty_feeds = True remove_empty_feeds = True
publication_type = 'newspaper' publication_type = 'newspaper'
masthead_url = 'http://www.latimes.com/images/logo.png'
cover_url = 'http://www.latimes.com/includes/sectionfronts/A1.pdf' cover_url = 'http://www.latimes.com/includes/sectionfronts/A1.pdf'
keep_only_tags = [ keep_only_tags = [
@ -42,10 +42,18 @@ class LATimes(BasicNewsRecipe):
def parse_index(self): def parse_index(self):
soup = self.index_to_soup('http://www.latimes.com') soup = self.index_to_soup('http://www.latimes.com')
feeds = defaultdict(list) feeds = defaultdict(list)
for x in soup.findAll(attrs={'data-content-type': 'story', 'data-content-section': True, 'data-content-url': True, 'data-content-title': True}): for x in soup.findAll(
url = absurl(x['data-content-url']) attrs={
'data-content-type': 'story',
'data-content-section': True,
'data-content-slug': True,
}
):
a = x.find('a', attrs={'class': lambda x: not x or 'SectionHeading' not in x})
if a is not None:
url = absurl(a['href'])
section = x['data-content-section'].capitalize() section = x['data-content-section'].capitalize()
title = x['data-content-title'] title = self.tag_to_string(a)
feeds[section].append({'title': title, 'url': url}) feeds[section].append({'title': title, 'url': url})
self.log(pformat(dict(feeds))) self.log(pformat(dict(feeds)))
return [(k, feeds[k]) for k in sorted(feeds)] return [(k, feeds[k]) for k in sorted(feeds)]