Fixup AINOnline for beautifulsoup 4

2025-07-09 03:04:10 -04:00 · 2019-06-12 13:25:04 +05:30 · 2019-06-12 13:25:04 +05:30 · eafb79aaca
commit eafb79aaca
parent ea74df97ec
1 changed files with 29 additions and 30 deletions
--- a/recipes/ainonline.recipe
+++ b/recipes/ainonline.recipe
@ -2,12 +2,10 @@
 # vim:fileencoding=utf-8
 # License: GPLv3 Copyright: 2019, Jose Ortiz <jlortiz84 at gmail.com>
-from __future__ import (unicode_literals, division, absolute_import,
+from __future__ import (unicode_literals, division, absolute_import, print_function)
                        print_function)
 from calibre.web.feeds.recipes import BasicNewsRecipe
 from pprint import pformat
 INDEX = 'https://www.ainonline.com/'
@ -19,24 +17,27 @@ def absurl(url):
 def classes(classes):
    q = frozenset(classes.split(' '))
-    return dict(attrs={
+    return dict(
-        'class': lambda x: x and frozenset(x.split()).intersection(q)})
+        attrs={'class': lambda x: x and frozenset(x.split()).intersection(q)}
    )
 class AINOnline(BasicNewsRecipe):
    title = 'Aviation International News'
    __author__ = 'Jose Ortiz'
-    description = ('Aviation International News covers all sectors of the aviation'
+    description = (
-                   ' industry, from business aviation to air transport to defense and'
+        'Aviation International News covers all sectors of the aviation'
-                   ' unmanned aerial vehicles.')
+        ' industry, from business aviation to air transport to defense and'
        ' unmanned aerial vehicles.'
    )
    language = 'en'
    encoding = 'utf-8'
    no_stylesheets = True
    remove_javascript = True
    masthead_url = 'https://www.ainonline.com/sites/ainonline.com/themes/ain30/images/ainlogo-small.jpg'
-    keep_only_tags=[classes('main-content')]
+    keep_only_tags = [classes('main-content')]
    remove_tags = [
-        dict(name=['button','input']),
+        dict(name=['button', 'input']),
        dict(attrs={'class': lambda x: x and 'comments' in x})
    ]
@ -48,50 +49,48 @@ class AINOnline(BasicNewsRecipe):
        #     .view-content [class *= 'featured-story']
        #     .view-content .views-row
        article_attrs = {
-            'class': lambda x: x and (
+            'class':
-                'featured-story' in x
+            lambda x: x and (
-                or frozenset(['views-row']).intersection(
+                'featured-story' in x or frozenset(['views-row']).
-                    frozenset(x.split())))}
+                intersection(frozenset(x.split()))
            )
        }
        ans = []
        for section in soup.findAll(**classes('view-content')):
-            if section.findParent(
+            if section.findParent(attrs=dict(id='featured')) is not None:
                    attrs=dict(id='featured')) is not None:
                current_section = 'Featured'
-            elif section.findParent(
+            elif section.findParent(attrs=dict(id='home-top-stories')) is not None:
                    attrs=dict(
                        id='home-top-stories')) is not None:
                current_section = 'Top Stories'
            elif section.findParent(
-                    attrs=dict(
+                attrs=dict(id='quicktabs-container-latest_trending')
-                        id='quicktabs-container-latest_trending'
+            ) is not None:
                    )) is not None:
                current_section = 'Latest/Trending'
            else:
                current_section = 'Articles'
            articles = []
            for div in section.findAll(attrs=article_attrs):
-                if frozenset(['views-row']).intersection(
+                if frozenset(['views-row']).intersection(frozenset(div['class'])):
                        frozenset(div['class'].split())):
                    a = div.find(**classes('title')).a
                elif 'featured-story' in div['class']:
                    a = div.find(
-                        lambda tag: tag.name == 'a'
+                        lambda tag: tag.name == 'a' and tag.
-                        and tag.find(['h1','h2','h3','h4','h5','h6'])
+                        find(['h1', 'h2', 'h3', 'h4', 'h5', 'h6']) is not None
-                        is not None)
+                    )
                else:
                    continue
                title = self.tag_to_string(a)
                url = absurl(a['href'])
                desc = ''
                r = div.find(**classes('teaser'))
                if r is not None:
                    desc = self.tag_to_string(r)
-                articles.append(
+                articles.append({'title': title, 'url': url, 'description': desc})
                    {'title': title, 'url': url, 'description': desc})
            if articles:
-                for title, articles_  in ans:
+                for title, articles_ in ans:
                    if current_section == title:
                        articles_.extend(articles)
                        break