diff --git a/manual/news.rst b/manual/news.rst index e7f132f7de..5b56408d79 100644 --- a/manual/news.rst +++ b/manual/news.rst @@ -198,12 +198,12 @@ A reasonably complex real life example that exposes more of the :term:`API` of ` for div in soup.findAll(True, attrs={'class':['section-headline', 'story', 'story headline']}): - if div['class'] == 'section-headline': + if ''.join(div['class']) == 'section-headline': key = string.capwords(feed_title(div)) articles[key] = [] ans.append(key) - elif div['class'] in ['story', 'story headline']: + elif ''.join(div['class']) in ['story', 'story headline']: a = div.find('a', href=True) if not a: continue diff --git a/recipes/1843.recipe b/recipes/1843.recipe index 88dedbc825..44a1bbe40a 100644 --- a/recipes/1843.recipe +++ b/recipes/1843.recipe @@ -44,7 +44,7 @@ class E1843(BasicNewsRecipe): current_section = articles = None for div in soup.findAll(**classes('field-name-field-header node-article')): - if 'field-header' in div['class']: + if 'field-header' in ''.join(div['class']): if current_section and articles: ans.append((current_section, articles)) current_section = self.tag_to_string(div) diff --git a/recipes/animal_politico.recipe b/recipes/animal_politico.recipe index 61674548c6..16315e9137 100644 --- a/recipes/animal_politico.recipe +++ b/recipes/animal_politico.recipe @@ -41,7 +41,7 @@ class AdvancedUserRecipe1290663986(BasicNewsRecipe): 'description': article_desc, 'url': article_url}) # Avoid including the multimedia stuff. - if entry['class'].find('last') != -1: + if ''.join(entry['class']).find('last') != -1: break return articles @@ -86,7 +86,7 @@ class AdvancedUserRecipe1290663986(BasicNewsRecipe): 'description': article_desc, 'url': article_url}) # Avoid including the multimedia stuff. - if entry['class'].find('last') != -1: + if ''.join(entry['class']).find('last') != -1: break return articles diff --git a/recipes/atlantic.recipe b/recipes/atlantic.recipe index 463b67bbe8..cc52dfe59e 100644 --- a/recipes/atlantic.recipe +++ b/recipes/atlantic.recipe @@ -128,13 +128,16 @@ class TheAtlantic(BasicNewsRecipe): feeds = [] for div in soup.findAll('div', attrs={'class': lambda x: x and set(x.split()).intersection({'top-sections', 'bottom-sections'})}): for h2 in div.findAll('h2', attrs={'class': True}): - if 'section-name' in h2['class'].split(): + cls = h2['class'] + if hasattr(cls, 'split'): + cls = cls.split() + if 'section-name' in cls: if current_articles: feeds.append((current_section, current_articles)) current_articles = [] current_section = self.tag_to_string(h2) self.log('\nFound section:', current_section) - elif 'hed' in h2['class'].split(): + elif 'hed' in cls: title = self.tag_to_string(h2) a = h2.findParent('a', href=True) url = a['href'] diff --git a/recipes/atlantic_com.recipe b/recipes/atlantic_com.recipe index 16ad344cd8..e7d9f5f337 100644 --- a/recipes/atlantic_com.recipe +++ b/recipes/atlantic_com.recipe @@ -128,13 +128,16 @@ class TheAtlantic(BasicNewsRecipe): feeds = [] for div in soup.findAll('div', attrs={'class': lambda x: x and set(x.split()).intersection({'top-sections', 'bottom-sections'})}): for h2 in div.findAll('h2', attrs={'class': True}): - if 'section-name' in h2['class'].split(): + cls = h2['class'] + if hasattr(cls, 'split'): + cls = cls.split() + if 'section-name' in cls: if current_articles: feeds.append((current_section, current_articles)) current_articles = [] current_section = self.tag_to_string(h2) self.log('\nFound section:', current_section) - elif 'hed' in h2['class'].split(): + elif 'hed' in cls: title = self.tag_to_string(h2) a = h2.findParent('a', href=True) url = a['href'] diff --git a/recipes/calgary_herald.recipe b/recipes/calgary_herald.recipe index a6c9dd74ff..5c24e06ec4 100644 --- a/recipes/calgary_herald.recipe +++ b/recipes/calgary_herald.recipe @@ -280,7 +280,7 @@ class CanWestPaper(BasicNewsRecipe): if dtag is not None: stag = dtag.span if stag is not None: - if stag['class'] != 'timestamp': + if ''.join(stag['class']) != 'timestamp': description = self.tag_to_string(stag, False) else: description = self.tag_to_string(dtag, False) diff --git a/recipes/cio_magazine.recipe b/recipes/cio_magazine.recipe index e2c04f7e0c..6323663684 100644 --- a/recipes/cio_magazine.recipe +++ b/recipes/cio_magazine.recipe @@ -102,13 +102,13 @@ class CIO_Magazine(BasicNewsRecipe): for div in soup.findAll(True, attrs={'class': ['heading', 'issue_item']}): - if div['class'] == 'heading': + if ''.join(div['class']) == 'heading': key = string.capwords(self.tag_to_string(div.span)) print("Key: ", key) # Esto es para depurar articles[key] = [] feeds.append(key) - elif div['class'] == 'issue_item': + elif ''.join(div['class']) == 'issue_item': a = div.find('a', href=True) if not a: continue diff --git a/recipes/edmonton_journal.recipe b/recipes/edmonton_journal.recipe index 40dc953feb..544041c50a 100644 --- a/recipes/edmonton_journal.recipe +++ b/recipes/edmonton_journal.recipe @@ -280,7 +280,7 @@ class CanWestPaper(BasicNewsRecipe): if dtag is not None: stag = dtag.span if stag is not None: - if stag['class'] != 'timestamp': + if ''.join(stag['class']) != 'timestamp': description = self.tag_to_string(stag, False) else: description = self.tag_to_string(dtag, False) diff --git a/recipes/esenja.recipe b/recipes/esenja.recipe index b4e38374f9..b81e9eca10 100644 --- a/recipes/esenja.recipe +++ b/recipes/esenja.recipe @@ -79,7 +79,7 @@ class Esensja(BasicNewsRecipe): section += ' - ' + subchapter feeds.append((section, articles)) articles = [] - if tag['class'] == 'chapter': + if ''.join(tag['class']) == 'chapter': chapter = self.tag_to_string(tag).capitalize() subchapter = '' else: diff --git a/recipes/kopalniawiedzy.recipe b/recipes/kopalniawiedzy.recipe index 308a68be7a..cd708cf095 100644 --- a/recipes/kopalniawiedzy.recipe +++ b/recipes/kopalniawiedzy.recipe @@ -31,7 +31,7 @@ class KopalniaWiedzy(BasicNewsRecipe): (re.compile(u''), lambda match: ''), (re.compile(u'

'), - lambda match: '') + lambda match: '
') ] feeds = [ @@ -44,7 +44,7 @@ class KopalniaWiedzy(BasicNewsRecipe): ] def is_link_wanted(self, url, tag): - return tag['class'] == 'next' + return ''.join(tag['class']) == 'next' def remove_beyond(self, tag, next): while tag is not None and getattr(tag, 'name', None) != 'body': diff --git a/recipes/lenta_ru.recipe b/recipes/lenta_ru.recipe index a680013cae..4072498c9a 100644 --- a/recipes/lenta_ru.recipe +++ b/recipes/lenta_ru.recipe @@ -151,7 +151,7 @@ class LentaRURecipe(BasicNewsRecipe): for date in dates: for string in date: parent = date.parent - if (parent and isinstance(parent, Tag) and 'div' == parent.name and 'dt' == parent['class']): + if (parent and isinstance(parent, Tag) and 'div' == parent.name and 'dt' == ''.join(parent['class'])): # Date div found parent.extract() parent[ diff --git a/recipes/letsgetcritical.recipe b/recipes/letsgetcritical.recipe index 8124f0c1c5..0e41e1de32 100644 --- a/recipes/letsgetcritical.recipe +++ b/recipes/letsgetcritical.recipe @@ -80,7 +80,7 @@ class LetsGetCritical(BasicNewsRecipe): p = post.previousSibling # navigate up sibling to find date while p: - if hasattr(p, 'class') and p['class'] == 'singledate': + if ''.join(p.get('class') or '') == 'singledate': date = self.tag_to_string(p) break p = p.previousSibling diff --git a/recipes/lwn_weekly.recipe b/recipes/lwn_weekly.recipe index 39da52ef98..e305cb0714 100644 --- a/recipes/lwn_weekly.recipe +++ b/recipes/lwn_weekly.recipe @@ -95,15 +95,16 @@ class WeeklyLWN(BasicNewsRecipe): break text = self.tag_to_string(curr.contents[0]) + cclass = ''.join(curr['class']) - if 'Cat2HL' in curr['class']: + if 'Cat2HL' in cclass: subsection = text - elif 'Cat1HL' in curr['class']: + elif 'Cat1HL' in cclass: section = text subsection = None - elif 'SummaryHL' in curr['class']: + elif 'SummaryHL' in cclass: article_title = text if not article_title: article_title = _('Undefined article title') diff --git a/recipes/mediapart.recipe b/recipes/mediapart.recipe index f9ca833f58..113c0d00fe 100644 --- a/recipes/mediapart.recipe +++ b/recipes/mediapart.recipe @@ -60,7 +60,7 @@ class Mediapart(BasicNewsRecipe): try: title = article.find('h3', recursive=False) - if title is None or title['class'] == 'title-specific': + if title is None or ''.join(title['class']) == 'title-specific': continue # print "found fil ",title diff --git a/recipes/moneycontrol.recipe b/recipes/moneycontrol.recipe index 1c893e6d44..3c8f0483ea 100644 --- a/recipes/moneycontrol.recipe +++ b/recipes/moneycontrol.recipe @@ -51,7 +51,7 @@ class MoneyControlRecipe(BasicNewsRecipe): freshSoup.body.append(h1) for p in soup.findAll('p', attrs={'class': true}): - if p['class'] == 'MsoNormal': + if ''.join(p['class']) == 'MsoNormal': # We have some weird pagebreak marker here; it will not find all of them however continue diff --git a/recipes/montreal_gazette.recipe b/recipes/montreal_gazette.recipe index f5e7d6dccb..6fa7f878e4 100644 --- a/recipes/montreal_gazette.recipe +++ b/recipes/montreal_gazette.recipe @@ -280,7 +280,7 @@ class CanWestPaper(BasicNewsRecipe): if dtag is not None: stag = dtag.span if stag is not None: - if stag['class'] != 'timestamp': + if ''.join(stag['class']) != 'timestamp': description = self.tag_to_string(stag, False) else: description = self.tag_to_string(dtag, False) diff --git a/recipes/new_york_review_of_books.recipe b/recipes/new_york_review_of_books.recipe index 280daed696..3fbbf9796c 100644 --- a/recipes/new_york_review_of_books.recipe +++ b/recipes/new_york_review_of_books.recipe @@ -12,7 +12,7 @@ from calibre.web.feeds.news import BasicNewsRecipe def find_header(tag): - return tag.name == 'header' and tag.parent['class'] == 'article' + return tag.name == 'header' and ''.join(tag.parent['class']) == 'article' def absurl(url): diff --git a/recipes/new_york_review_of_books_no_sub.recipe b/recipes/new_york_review_of_books_no_sub.recipe index d76d265ffa..3c6a4ad5a6 100644 --- a/recipes/new_york_review_of_books_no_sub.recipe +++ b/recipes/new_york_review_of_books_no_sub.recipe @@ -12,7 +12,7 @@ from calibre.web.feeds.news import BasicNewsRecipe def find_header(tag): - return tag.name == 'header' and tag.parent['class'] == 'article' + return tag.name == 'header' and ''.join(tag.parent['class']) == 'article' def absurl(url): diff --git a/recipes/ottawa_citizen.recipe b/recipes/ottawa_citizen.recipe index 43762f067d..a157fdb181 100644 --- a/recipes/ottawa_citizen.recipe +++ b/recipes/ottawa_citizen.recipe @@ -280,7 +280,7 @@ class CanWestPaper(BasicNewsRecipe): if dtag is not None: stag = dtag.span if stag is not None: - if stag['class'] != 'timestamp': + if ''.join(stag['class']) != 'timestamp': description = self.tag_to_string(stag, False) else: description = self.tag_to_string(dtag, False) diff --git a/recipes/polter_pl.recipe b/recipes/polter_pl.recipe index 740a0f7ca5..db0fe58b77 100644 --- a/recipes/polter_pl.recipe +++ b/recipes/polter_pl.recipe @@ -47,12 +47,12 @@ class Polter(BasicNewsRecipe): for s in soup.findAll(style=True): if 'bold;' in s['style']: if s.get('class', ''): - s['class'] = s['class'] + ' p_title' + s['class'] = ''.join(s['class']) + ' p_title' else: s['class'] = 'p_title' if 'italic;' in s['style']: if s.get('class', ''): - s['class'] = s['class'] + ' italic' + s['class'] = ''.join(s['class']) + ' italic' else: s['class'] = 'italic' del s['style'] diff --git a/recipes/regina_leader_post.recipe b/recipes/regina_leader_post.recipe index 122e6a7cdd..b226697fc2 100644 --- a/recipes/regina_leader_post.recipe +++ b/recipes/regina_leader_post.recipe @@ -180,7 +180,7 @@ class CanWestPaper(BasicNewsRecipe): # Find each instance of class="sectiontitle", class="featurecontent" for divtag in soup.findAll('div', attrs={'class': ["section_title02", "featurecontent"]}): - if divtag['class'].startswith('section_title'): + if 'section_title' in ''.join(divtag['class']): # div contains section title if not divtag.h3: continue diff --git a/recipes/saskatoon_star_phoenix.recipe b/recipes/saskatoon_star_phoenix.recipe index 23602e93bd..f69e358e01 100644 --- a/recipes/saskatoon_star_phoenix.recipe +++ b/recipes/saskatoon_star_phoenix.recipe @@ -180,7 +180,7 @@ class CanWestPaper(BasicNewsRecipe): # Find each instance of class="sectiontitle", class="featurecontent" for divtag in soup.findAll('div', attrs={'class': ["section_title02", "featurecontent"]}): - if divtag['class'].startswith('section_title'): + if ''.join(divtag['class']).startswith('section_title'): # div contains section title if not divtag.h3: continue diff --git a/recipes/vancouver_sun.recipe b/recipes/vancouver_sun.recipe index 92acc06e90..cd1c4a0e48 100644 --- a/recipes/vancouver_sun.recipe +++ b/recipes/vancouver_sun.recipe @@ -281,7 +281,7 @@ class CanWestPaper(BasicNewsRecipe): if dtag is not None: stag = dtag.span if stag is not None: - if stag['class'] != 'timestamp': + if ''.join(stag['class']) != 'timestamp': description = self.tag_to_string(stag, False) else: description = self.tag_to_string(dtag, False) diff --git a/recipes/windows_star.recipe b/recipes/windows_star.recipe index d66e4cd9c1..fd17918911 100644 --- a/recipes/windows_star.recipe +++ b/recipes/windows_star.recipe @@ -70,7 +70,7 @@ class CanWestPaper(BasicNewsRecipe): # Find each instance of class="sectiontitle", class="featurecontent" for divtag in soup.findAll('div', attrs={'class': ["section_title02", "featurecontent"]}): - if divtag['class'].startswith('section_title'): + if 'section_title' in ''.join(divtag['class']): # div contains section title if not divtag.h3: continue diff --git a/recipes/windsor_star.recipe b/recipes/windsor_star.recipe index e02c4f507b..acd3068a74 100644 --- a/recipes/windsor_star.recipe +++ b/recipes/windsor_star.recipe @@ -181,7 +181,7 @@ class CanWestPaper(BasicNewsRecipe): # Find each instance of class="sectiontitle", class="featurecontent" for divtag in soup.findAll('div', attrs={'class': ["section_title02", "featurecontent"]}): # self.log(" div class = %s" % divtag['class']) - if divtag['class'].startswith('section_title'): + if ''.join(divtag['class']).startswith('section_title'): # div contains section title if not divtag.h3: continue