mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Forgot to update atlantic_com recipe
This commit is contained in:
parent
f6929462a4
commit
e21590ac17
@ -1,11 +1,10 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=utf-8
|
||||
# License: GPLv3 Copyright: 2015, Kovid Goyal <kovid at kovidgoyal.net>
|
||||
from __future__ import unicode_literals
|
||||
import json
|
||||
from xml.sax.saxutils import escape, quoteattr
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.web.feeds.news import BasicNewsRecipe, prefixed_classes as prefix_classes, classes
|
||||
|
||||
web_version = True
|
||||
test_article = None
|
||||
@ -67,26 +66,6 @@ def extract_html(soup):
|
||||
# }}}
|
||||
|
||||
|
||||
def classes(classes):
|
||||
q = frozenset(classes.split(' '))
|
||||
return dict(
|
||||
attrs={'class': lambda x: x and frozenset(x.split()).intersection(q)}
|
||||
)
|
||||
|
||||
|
||||
def prefix_classes(classes):
|
||||
q = classes.split()
|
||||
|
||||
def test(x):
|
||||
if x:
|
||||
for cls in x.split():
|
||||
for c in q:
|
||||
if cls.startswith(c):
|
||||
return True
|
||||
return False
|
||||
return dict(attrs={'class': test})
|
||||
|
||||
|
||||
class TheAtlantic(BasicNewsRecipe):
|
||||
|
||||
if web_version:
|
||||
@ -214,55 +193,25 @@ class TheAtlantic(BasicNewsRecipe):
|
||||
if test_article:
|
||||
return [('Articles', [{'title': 'Test article', 'url': test_article}])]
|
||||
soup = self.index_to_soup(self.INDEX)
|
||||
figure = soup.find('figure', id='cover-image')
|
||||
if figure is not None:
|
||||
img = figure.find('img', src=True)
|
||||
if img:
|
||||
img = soup.find(**prefix_classes('IssueDescription_cover__'))
|
||||
if img is not None:
|
||||
self.cover_url = img['src']
|
||||
current_section, current_articles = 'Cover Story', []
|
||||
feeds = []
|
||||
for div in soup.findAll('div', attrs={'class': lambda x: x and set(x.split()).intersection({'top-sections', 'bottom-sections'})}):
|
||||
for h2 in div.findAll('h2', attrs={'class': True}):
|
||||
cls = h2['class']
|
||||
if hasattr(cls, 'split'):
|
||||
cls = cls.split()
|
||||
if 'section-name' in cls:
|
||||
for x in soup.findAll(**prefix_classes('TocFeaturedSection_heading__ TocSection_heading__ TocHeroGridItem_hedLink___ TocGridItem_hedLink__')):
|
||||
cls = x['class']
|
||||
if not isinstance(cls, str):
|
||||
cls = ' '.join(cls)
|
||||
title = self.tag_to_string(x).strip()
|
||||
if 'Section' in cls:
|
||||
if current_articles:
|
||||
feeds.append((current_section, current_articles))
|
||||
current_articles = []
|
||||
current_section = self.tag_to_string(h2)
|
||||
self.log('\nFound section:', current_section)
|
||||
elif 'hed' in cls:
|
||||
title = self.tag_to_string(h2)
|
||||
a = h2.findParent('a', href=True)
|
||||
if a is None:
|
||||
current_section, current_articles = title, []
|
||||
self.log(current_section)
|
||||
continue
|
||||
url = a['href']
|
||||
if url.startswith('/'):
|
||||
url = 'https://www.theatlantic.com' + url
|
||||
li = a.findParent(
|
||||
'li',
|
||||
attrs={'class': lambda x: x and 'article' in x.split()}
|
||||
)
|
||||
desc = ''
|
||||
dek = li.find(
|
||||
attrs={'class': lambda x: x and 'dek' in x.split()}
|
||||
)
|
||||
if dek is not None:
|
||||
desc += self.tag_to_string(dek)
|
||||
byline = li.find(
|
||||
attrs={'class': lambda x: x and 'byline' in x.split()}
|
||||
)
|
||||
if byline is not None:
|
||||
desc += ' -- ' + self.tag_to_string(byline)
|
||||
self.log('\t', title, 'at', url)
|
||||
if desc:
|
||||
self.log('\t\t', desc)
|
||||
current_articles.append({
|
||||
'title': title,
|
||||
'url': url,
|
||||
'description': desc
|
||||
})
|
||||
url = x['href']
|
||||
current_articles.append({'title': title, 'url': url})
|
||||
self.log('\t', title, url)
|
||||
if current_articles:
|
||||
feeds.append((current_section, current_articles))
|
||||
return feeds
|
||||
|
Loading…
x
Reference in New Issue
Block a user