mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Aviation International News by Jose Ortiz
This commit is contained in:
parent
f66a709dfe
commit
7a048fe361
102
recipes/ainonline.recipe
Normal file
102
recipes/ainonline.recipe
Normal file
@ -0,0 +1,102 @@
|
|||||||
|
#!/usr/bin/env python2
|
||||||
|
# vim:fileencoding=utf-8
|
||||||
|
# License: GPLv3 Copyright: 2019, Jose Ortiz <jlortiz84 at gmail.com>
|
||||||
|
|
||||||
|
from __future__ import (unicode_literals, division, absolute_import,
|
||||||
|
print_function)
|
||||||
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
|
from pprint import pformat
|
||||||
|
|
||||||
|
|
||||||
|
INDEX = 'https://www.ainonline.com/'
|
||||||
|
|
||||||
|
|
||||||
|
def absurl(url):
|
||||||
|
if url.startswith('/'):
|
||||||
|
url = INDEX + url[1:]
|
||||||
|
return url
|
||||||
|
|
||||||
|
|
||||||
|
def classes(classes):
|
||||||
|
q = frozenset(classes.split(' '))
|
||||||
|
return dict(attrs={
|
||||||
|
'class': lambda x: x and frozenset(x.split()).intersection(q)})
|
||||||
|
|
||||||
|
|
||||||
|
class AINOnline(BasicNewsRecipe):
|
||||||
|
title = 'Aviation International News'
|
||||||
|
__author__ = 'Jose Ortiz'
|
||||||
|
description = ('Aviation International News covers all sectors of the aviation'
|
||||||
|
' industry, from business aviation to air transport to defense and'
|
||||||
|
' unmanned aerial vehicles.')
|
||||||
|
language = 'en'
|
||||||
|
encoding = 'utf-8'
|
||||||
|
no_stylesheets = True
|
||||||
|
remove_javascript = True
|
||||||
|
masthead_url = 'https://www.ainonline.com/sites/ainonline.com/themes/ain30/images/ainlogo-small.jpg'
|
||||||
|
keep_only_tags=[classes('main-content')]
|
||||||
|
remove_tags = [
|
||||||
|
dict(name=['button','input']),
|
||||||
|
dict(attrs={'class': lambda x: x and 'comments' in x})
|
||||||
|
]
|
||||||
|
|
||||||
|
def parse_index(self):
|
||||||
|
|
||||||
|
soup = self.index_to_soup(INDEX)
|
||||||
|
|
||||||
|
# css selectors for articles
|
||||||
|
# .view-content [class *= 'featured-story']
|
||||||
|
# .view-content .views-row
|
||||||
|
article_attrs = {
|
||||||
|
'class': lambda x: x and (
|
||||||
|
'featured-story' in x
|
||||||
|
or frozenset(['views-row']).intersection(
|
||||||
|
frozenset(x.split())))}
|
||||||
|
|
||||||
|
ans = []
|
||||||
|
|
||||||
|
for section in soup.findAll(**classes('view-content')):
|
||||||
|
|
||||||
|
if section.findParent(
|
||||||
|
attrs=dict(id='featured')) is not None:
|
||||||
|
current_section = 'Featured'
|
||||||
|
elif section.findParent(
|
||||||
|
attrs=dict(
|
||||||
|
id='home-top-stories')) is not None:
|
||||||
|
current_section = 'Top Stories'
|
||||||
|
elif section.findParent(
|
||||||
|
attrs=dict(
|
||||||
|
id='quicktabs-container-latest_trending'
|
||||||
|
)) is not None:
|
||||||
|
current_section = 'Latest/Trending'
|
||||||
|
else:
|
||||||
|
current_section = 'Articles'
|
||||||
|
|
||||||
|
articles = []
|
||||||
|
for div in section.findAll(attrs=article_attrs):
|
||||||
|
if frozenset(['views-row']).intersection(
|
||||||
|
frozenset(div['class'].split())):
|
||||||
|
a = div.find(**classes('title')).a
|
||||||
|
elif 'featured-story' in div['class']:
|
||||||
|
a = div.find(
|
||||||
|
lambda tag: tag.name == 'a'
|
||||||
|
and tag.find(['h1','h2','h3','h4','h5','h6'])
|
||||||
|
is not None)
|
||||||
|
title = self.tag_to_string(a)
|
||||||
|
url = absurl(a['href'])
|
||||||
|
desc = ''
|
||||||
|
r = div.find(**classes('teaser'))
|
||||||
|
if r is not None:
|
||||||
|
desc = self.tag_to_string(r)
|
||||||
|
articles.append(
|
||||||
|
{'title': title, 'url': url, 'description': desc})
|
||||||
|
if articles:
|
||||||
|
for title, articles_ in ans:
|
||||||
|
if current_section == title:
|
||||||
|
articles_.extend(articles)
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
ans.append((current_section, articles))
|
||||||
|
|
||||||
|
self.log(pformat(ans))
|
||||||
|
return ans
|
Loading…
x
Reference in New Issue
Block a user