Handle direct querying of class attribute across BS versions

2025-07-09 03:04:10 -04:00 · 2019-03-23 21:47:58 +05:30 · 2019-03-23 21:47:58 +05:30 · 8813a31a38
commit 8813a31a38
parent 3045dc3c71
25 changed files with 41 additions and 34 deletions
--- a/manual/news.rst
+++ b/manual/news.rst
@ -198,12 +198,12 @@ A reasonably complex real life example that exposes more of the :term:`API` of `
           for div in soup.findAll(True,
                attrs={'class':['section-headline', 'story', 'story headline']}):

-                if div['class'] == 'section-headline':
+                if ''.join(div['class']) == 'section-headline':
                    key = string.capwords(feed_title(div))
                    articles[key] = []
                    ans.append(key)

-                elif div['class'] in ['story', 'story headline']:
+                elif ''.join(div['class']) in ['story', 'story headline']:
                    a = div.find('a', href=True)
                    if not a:
                        continue
--- a/recipes/1843.recipe
+++ b/recipes/1843.recipe
@ -44,7 +44,7 @@ class E1843(BasicNewsRecipe):
        current_section = articles = None

        for div in soup.findAll(**classes('field-name-field-header node-article')):
-            if 'field-header' in div['class']:
+            if 'field-header' in ''.join(div['class']):
                if current_section and articles:
                    ans.append((current_section, articles))
                current_section = self.tag_to_string(div)
--- a/recipes/animal_politico.recipe
+++ b/recipes/animal_politico.recipe
@ -41,7 +41,7 @@ class AdvancedUserRecipe1290663986(BasicNewsRecipe):
                             'description': article_desc,
                             'url': article_url})
            # Avoid including the multimedia stuff.
-            if entry['class'].find('last') != -1:
+            if ''.join(entry['class']).find('last') != -1:
                break

        return articles
@ -86,7 +86,7 @@ class AdvancedUserRecipe1290663986(BasicNewsRecipe):
                             'description': article_desc,
                             'url': article_url})
            # Avoid including the multimedia stuff.
-            if entry['class'].find('last') != -1:
+            if ''.join(entry['class']).find('last') != -1:
                break

        return articles
--- a/recipes/atlantic.recipe
+++ b/recipes/atlantic.recipe
@ -128,13 +128,16 @@ class TheAtlantic(BasicNewsRecipe):
            feeds = []
            for div in soup.findAll('div', attrs={'class': lambda x: x and set(x.split()).intersection({'top-sections', 'bottom-sections'})}):
                for h2 in div.findAll('h2', attrs={'class': True}):
-                    if 'section-name' in h2['class'].split():
+                    cls = h2['class']
+                    if hasattr(cls, 'split'):
+                        cls = cls.split()
+                    if 'section-name' in cls:
                        if current_articles:
                            feeds.append((current_section, current_articles))
                        current_articles = []
                        current_section = self.tag_to_string(h2)
                        self.log('\nFound section:', current_section)
-                    elif 'hed' in h2['class'].split():
+                    elif 'hed' in cls:
                        title = self.tag_to_string(h2)
                        a = h2.findParent('a', href=True)
                        url = a['href']
--- a/recipes/atlantic_com.recipe
+++ b/recipes/atlantic_com.recipe
@ -128,13 +128,16 @@ class TheAtlantic(BasicNewsRecipe):
            feeds = []
            for div in soup.findAll('div', attrs={'class': lambda x: x and set(x.split()).intersection({'top-sections', 'bottom-sections'})}):
                for h2 in div.findAll('h2', attrs={'class': True}):
-                    if 'section-name' in h2['class'].split():
+                    cls = h2['class']
+                    if hasattr(cls, 'split'):
+                        cls = cls.split()
+                    if 'section-name' in cls:
                        if current_articles:
                            feeds.append((current_section, current_articles))
                        current_articles = []
                        current_section = self.tag_to_string(h2)
                        self.log('\nFound section:', current_section)
-                    elif 'hed' in h2['class'].split():
+                    elif 'hed' in cls:
                        title = self.tag_to_string(h2)
                        a = h2.findParent('a', href=True)
                        url = a['href']
--- a/recipes/calgary_herald.recipe
+++ b/recipes/calgary_herald.recipe
@ -280,7 +280,7 @@ class CanWestPaper(BasicNewsRecipe):
                    if dtag is not None:
                        stag = dtag.span
                        if stag is not None:
-                            if stag['class'] != 'timestamp':
+                            if ''.join(stag['class']) != 'timestamp':
                                description = self.tag_to_string(stag, False)
                        else:
                            description = self.tag_to_string(dtag, False)
--- a/recipes/cio_magazine.recipe
+++ b/recipes/cio_magazine.recipe
@ -102,13 +102,13 @@ class CIO_Magazine(BasicNewsRecipe):
        for div in soup.findAll(True,
                                attrs={'class': ['heading', 'issue_item']}):

-            if div['class'] == 'heading':
+            if ''.join(div['class']) == 'heading':
                key = string.capwords(self.tag_to_string(div.span))
                print("Key: ", key)  # Esto es para depurar
                articles[key] = []
                feeds.append(key)

-            elif div['class'] == 'issue_item':
+            elif ''.join(div['class']) == 'issue_item':
                a = div.find('a', href=True)
                if not a:
                    continue
--- a/recipes/edmonton_journal.recipe
+++ b/recipes/edmonton_journal.recipe
@ -280,7 +280,7 @@ class CanWestPaper(BasicNewsRecipe):
                    if dtag is not None:
                        stag = dtag.span
                        if stag is not None:
-                            if stag['class'] != 'timestamp':
+                            if ''.join(stag['class']) != 'timestamp':
                                description = self.tag_to_string(stag, False)
                        else:
                            description = self.tag_to_string(dtag, False)
--- a/recipes/esenja.recipe
+++ b/recipes/esenja.recipe
@ -79,7 +79,7 @@ class Esensja(BasicNewsRecipe):
                        section += ' - ' + subchapter
                    feeds.append((section, articles))
                    articles = []
-                if tag['class'] == 'chapter':
+                if ''.join(tag['class']) == 'chapter':
                    chapter = self.tag_to_string(tag).capitalize()
                    subchapter = ''
                else:
--- a/recipes/kopalniawiedzy.recipe
+++ b/recipes/kopalniawiedzy.recipe
@ -31,7 +31,7 @@ class KopalniaWiedzy(BasicNewsRecipe):
        (re.compile(u'<a .* rel="lightboxText" .*><img (.*)></a>'),
         lambda match: '<img class="topimage" ' + match.group(1) + '>'),
        (re.compile(u'<br  /><br  />'),
-         lambda match: '<br\/>')
+         lambda match: '<br/>')
    ]

    feeds = [
@ -44,7 +44,7 @@ class KopalniaWiedzy(BasicNewsRecipe):
    ]

    def is_link_wanted(self, url, tag):
-        return tag['class'] == 'next'
+        return ''.join(tag['class']) == 'next'

    def remove_beyond(self, tag, next):
        while tag is not None and getattr(tag, 'name', None) != 'body':
--- a/recipes/lenta_ru.recipe
+++ b/recipes/lenta_ru.recipe
@ -151,7 +151,7 @@ class LentaRURecipe(BasicNewsRecipe):
            for date in dates:
                for string in date:
                    parent = date.parent
-                    if (parent and isinstance(parent, Tag) and 'div' == parent.name and 'dt' == parent['class']):
+                    if (parent and isinstance(parent, Tag) and 'div' == parent.name and 'dt' == ''.join(parent['class'])):
                        # Date div found
                        parent.extract()
                        parent[
--- a/recipes/letsgetcritical.recipe
+++ b/recipes/letsgetcritical.recipe
@ -80,7 +80,7 @@ class LetsGetCritical(BasicNewsRecipe):
                    p = post.previousSibling
                    # navigate up sibling to find date
                    while p:
-                        if hasattr(p, 'class') and p['class'] == 'singledate':
+                        if ''.join(p.get('class') or '') == 'singledate':
                            date = self.tag_to_string(p)
                            break
                        p = p.previousSibling
--- a/recipes/lwn_weekly.recipe
+++ b/recipes/lwn_weekly.recipe
@ -95,15 +95,16 @@ class WeeklyLWN(BasicNewsRecipe):
                break

            text = self.tag_to_string(curr.contents[0])
+            cclass = ''.join(curr['class'])

-            if 'Cat2HL' in curr['class']:
+            if 'Cat2HL' in cclass:
                subsection = text

-            elif 'Cat1HL' in curr['class']:
+            elif 'Cat1HL' in cclass:
                section = text
                subsection = None

-            elif 'SummaryHL' in curr['class']:
+            elif 'SummaryHL' in cclass:
                article_title = text
                if not article_title:
                    article_title = _('Undefined article title')
--- a/recipes/mediapart.recipe
+++ b/recipes/mediapart.recipe
@ -60,7 +60,7 @@ class Mediapart(BasicNewsRecipe):
            try:
                title = article.find('h3', recursive=False)

-                if title is None or title['class'] == 'title-specific':
+                if title is None or ''.join(title['class']) == 'title-specific':
                    continue

                # print "found fil ",title
--- a/recipes/moneycontrol.recipe
+++ b/recipes/moneycontrol.recipe
@ -51,7 +51,7 @@ class MoneyControlRecipe(BasicNewsRecipe):
            freshSoup.body.append(h1)

        for p in soup.findAll('p', attrs={'class': true}):
-            if p['class'] == 'MsoNormal':
+            if ''.join(p['class']) == 'MsoNormal':
                # We have some weird pagebreak marker here; it will not find all of them however
                continue

--- a/recipes/montreal_gazette.recipe
+++ b/recipes/montreal_gazette.recipe
@ -280,7 +280,7 @@ class CanWestPaper(BasicNewsRecipe):
                    if dtag is not None:
                        stag = dtag.span
                        if stag is not None:
-                            if stag['class'] != 'timestamp':
+                            if ''.join(stag['class']) != 'timestamp':
                                description = self.tag_to_string(stag, False)
                        else:
                            description = self.tag_to_string(dtag, False)
--- a/recipes/new_york_review_of_books.recipe
+++ b/recipes/new_york_review_of_books.recipe
@ -12,7 +12,7 @@ from calibre.web.feeds.news import BasicNewsRecipe


 def find_header(tag):
-    return tag.name == 'header' and tag.parent['class'] == 'article'
+    return tag.name == 'header' and ''.join(tag.parent['class']) == 'article'


 def absurl(url):
--- a/recipes/new_york_review_of_books_no_sub.recipe
+++ b/recipes/new_york_review_of_books_no_sub.recipe
@ -12,7 +12,7 @@ from calibre.web.feeds.news import BasicNewsRecipe


 def find_header(tag):
-    return tag.name == 'header' and tag.parent['class'] == 'article'
+    return tag.name == 'header' and ''.join(tag.parent['class']) == 'article'


 def absurl(url):
--- a/recipes/ottawa_citizen.recipe
+++ b/recipes/ottawa_citizen.recipe
@ -280,7 +280,7 @@ class CanWestPaper(BasicNewsRecipe):
                    if dtag is not None:
                        stag = dtag.span
                        if stag is not None:
-                            if stag['class'] != 'timestamp':
+                            if ''.join(stag['class']) != 'timestamp':
                                description = self.tag_to_string(stag, False)
                        else:
                            description = self.tag_to_string(dtag, False)
--- a/recipes/polter_pl.recipe
+++ b/recipes/polter_pl.recipe
@ -47,12 +47,12 @@ class Polter(BasicNewsRecipe):
        for s in soup.findAll(style=True):
            if 'bold;' in s['style']:
                if s.get('class', ''):
-                    s['class'] = s['class'] + ' p_title'
+                    s['class'] = ''.join(s['class']) + ' p_title'
                else:
                    s['class'] = 'p_title'
            if 'italic;' in s['style']:
                if s.get('class', ''):
-                    s['class'] = s['class'] + ' italic'
+                    s['class'] = ''.join(s['class']) + ' italic'
                else:
                    s['class'] = 'italic'
            del s['style']
--- a/recipes/regina_leader_post.recipe
+++ b/recipes/regina_leader_post.recipe
@ -180,7 +180,7 @@ class CanWestPaper(BasicNewsRecipe):

        # Find each instance of class="sectiontitle", class="featurecontent"
        for divtag in soup.findAll('div', attrs={'class': ["section_title02", "featurecontent"]}):
-            if divtag['class'].startswith('section_title'):
+            if 'section_title' in ''.join(divtag['class']):
                # div contains section title
                if not divtag.h3:
                    continue
--- a/recipes/saskatoon_star_phoenix.recipe
+++ b/recipes/saskatoon_star_phoenix.recipe
@ -180,7 +180,7 @@ class CanWestPaper(BasicNewsRecipe):

        # Find each instance of class="sectiontitle", class="featurecontent"
        for divtag in soup.findAll('div', attrs={'class': ["section_title02", "featurecontent"]}):
-            if divtag['class'].startswith('section_title'):
+            if ''.join(divtag['class']).startswith('section_title'):
                # div contains section title
                if not divtag.h3:
                    continue
--- a/recipes/vancouver_sun.recipe
+++ b/recipes/vancouver_sun.recipe
@ -281,7 +281,7 @@ class CanWestPaper(BasicNewsRecipe):
                    if dtag is not None:
                        stag = dtag.span
                        if stag is not None:
-                            if stag['class'] != 'timestamp':
+                            if ''.join(stag['class']) != 'timestamp':
                                description = self.tag_to_string(stag, False)
                        else:
                            description = self.tag_to_string(dtag, False)
--- a/recipes/windows_star.recipe
+++ b/recipes/windows_star.recipe
@ -70,7 +70,7 @@ class CanWestPaper(BasicNewsRecipe):

        # Find each instance of class="sectiontitle", class="featurecontent"
        for divtag in soup.findAll('div', attrs={'class': ["section_title02", "featurecontent"]}):
-            if divtag['class'].startswith('section_title'):
+            if 'section_title' in ''.join(divtag['class']):
                # div contains section title
                if not divtag.h3:
                    continue
--- a/recipes/windsor_star.recipe
+++ b/recipes/windsor_star.recipe
@ -181,7 +181,7 @@ class CanWestPaper(BasicNewsRecipe):
        # Find each instance of class="sectiontitle", class="featurecontent"
        for divtag in soup.findAll('div', attrs={'class': ["section_title02", "featurecontent"]}):
            # self.log(" div class = %s" % divtag['class'])
-            if divtag['class'].startswith('section_title'):
+            if ''.join(divtag['class']).startswith('section_title'):
                # div contains section title
                if not divtag.h3:
                    continue