mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Forgot to update atlantic_com recipe
This commit is contained in:
parent
f6929462a4
commit
e21590ac17
@ -1,11 +1,10 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
# vim:fileencoding=utf-8
|
# vim:fileencoding=utf-8
|
||||||
# License: GPLv3 Copyright: 2015, Kovid Goyal <kovid at kovidgoyal.net>
|
# License: GPLv3 Copyright: 2015, Kovid Goyal <kovid at kovidgoyal.net>
|
||||||
from __future__ import unicode_literals
|
|
||||||
import json
|
import json
|
||||||
from xml.sax.saxutils import escape, quoteattr
|
from xml.sax.saxutils import escape, quoteattr
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe, prefixed_classes as prefix_classes, classes
|
||||||
|
|
||||||
web_version = True
|
web_version = True
|
||||||
test_article = None
|
test_article = None
|
||||||
@ -67,26 +66,6 @@ def extract_html(soup):
|
|||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
|
|
||||||
def classes(classes):
|
|
||||||
q = frozenset(classes.split(' '))
|
|
||||||
return dict(
|
|
||||||
attrs={'class': lambda x: x and frozenset(x.split()).intersection(q)}
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def prefix_classes(classes):
|
|
||||||
q = classes.split()
|
|
||||||
|
|
||||||
def test(x):
|
|
||||||
if x:
|
|
||||||
for cls in x.split():
|
|
||||||
for c in q:
|
|
||||||
if cls.startswith(c):
|
|
||||||
return True
|
|
||||||
return False
|
|
||||||
return dict(attrs={'class': test})
|
|
||||||
|
|
||||||
|
|
||||||
class TheAtlantic(BasicNewsRecipe):
|
class TheAtlantic(BasicNewsRecipe):
|
||||||
|
|
||||||
if web_version:
|
if web_version:
|
||||||
@ -214,55 +193,25 @@ class TheAtlantic(BasicNewsRecipe):
|
|||||||
if test_article:
|
if test_article:
|
||||||
return [('Articles', [{'title': 'Test article', 'url': test_article}])]
|
return [('Articles', [{'title': 'Test article', 'url': test_article}])]
|
||||||
soup = self.index_to_soup(self.INDEX)
|
soup = self.index_to_soup(self.INDEX)
|
||||||
figure = soup.find('figure', id='cover-image')
|
img = soup.find(**prefix_classes('IssueDescription_cover__'))
|
||||||
if figure is not None:
|
if img is not None:
|
||||||
img = figure.find('img', src=True)
|
self.cover_url = img['src']
|
||||||
if img:
|
|
||||||
self.cover_url = img['src']
|
|
||||||
current_section, current_articles = 'Cover Story', []
|
current_section, current_articles = 'Cover Story', []
|
||||||
feeds = []
|
feeds = []
|
||||||
for div in soup.findAll('div', attrs={'class': lambda x: x and set(x.split()).intersection({'top-sections', 'bottom-sections'})}):
|
for x in soup.findAll(**prefix_classes('TocFeaturedSection_heading__ TocSection_heading__ TocHeroGridItem_hedLink___ TocGridItem_hedLink__')):
|
||||||
for h2 in div.findAll('h2', attrs={'class': True}):
|
cls = x['class']
|
||||||
cls = h2['class']
|
if not isinstance(cls, str):
|
||||||
if hasattr(cls, 'split'):
|
cls = ' '.join(cls)
|
||||||
cls = cls.split()
|
title = self.tag_to_string(x).strip()
|
||||||
if 'section-name' in cls:
|
if 'Section' in cls:
|
||||||
if current_articles:
|
if current_articles:
|
||||||
feeds.append((current_section, current_articles))
|
feeds.append((current_section, current_articles))
|
||||||
current_articles = []
|
current_section, current_articles = title, []
|
||||||
current_section = self.tag_to_string(h2)
|
self.log(current_section)
|
||||||
self.log('\nFound section:', current_section)
|
continue
|
||||||
elif 'hed' in cls:
|
url = x['href']
|
||||||
title = self.tag_to_string(h2)
|
current_articles.append({'title': title, 'url': url})
|
||||||
a = h2.findParent('a', href=True)
|
self.log('\t', title, url)
|
||||||
if a is None:
|
|
||||||
continue
|
|
||||||
url = a['href']
|
|
||||||
if url.startswith('/'):
|
|
||||||
url = 'https://www.theatlantic.com' + url
|
|
||||||
li = a.findParent(
|
|
||||||
'li',
|
|
||||||
attrs={'class': lambda x: x and 'article' in x.split()}
|
|
||||||
)
|
|
||||||
desc = ''
|
|
||||||
dek = li.find(
|
|
||||||
attrs={'class': lambda x: x and 'dek' in x.split()}
|
|
||||||
)
|
|
||||||
if dek is not None:
|
|
||||||
desc += self.tag_to_string(dek)
|
|
||||||
byline = li.find(
|
|
||||||
attrs={'class': lambda x: x and 'byline' in x.split()}
|
|
||||||
)
|
|
||||||
if byline is not None:
|
|
||||||
desc += ' -- ' + self.tag_to_string(byline)
|
|
||||||
self.log('\t', title, 'at', url)
|
|
||||||
if desc:
|
|
||||||
self.log('\t\t', desc)
|
|
||||||
current_articles.append({
|
|
||||||
'title': title,
|
|
||||||
'url': url,
|
|
||||||
'description': desc
|
|
||||||
})
|
|
||||||
if current_articles:
|
if current_articles:
|
||||||
feeds.append((current_section, current_articles))
|
feeds.append((current_section, current_articles))
|
||||||
return feeds
|
return feeds
|
||||||
|
Loading…
x
Reference in New Issue
Block a user