diff --git a/manual/news.rst b/manual/news.rst index 1ea813f20a..a0c80defdf 100644 --- a/manual/news.rst +++ b/manual/news.rst @@ -217,7 +217,7 @@ A reasonably complex real life example that exposes more of the :term:`API` of ` description = self.tag_to_string(summary, use_alt=False) feed = key if key is not None else 'Uncategorized' - if not articles.has_key(feed): + if feed not in articles: articles[feed] = [] if not 'podcasts' in url: articles[feed].append( @@ -225,7 +225,7 @@ A reasonably complex real life example that exposes more of the :term:`API` of ` description=description, content='')) ans = self.sort_index_by(ans, {'The Front Page':-1, 'Dining In, Dining Out':1, 'Obituaries':2}) - ans = [(key, articles[key]) for key in ans if articles.has_key(key)] + ans = [(key, articles[key]) for key in ans if key in articles] return ans def preprocess_html(self, soup): diff --git a/recipes/20minutos.recipe b/recipes/20minutos.recipe index 725278c045..e40fc174fa 100644 --- a/recipes/20minutos.recipe +++ b/recipes/20minutos.recipe @@ -60,7 +60,6 @@ class t20Minutos(BasicNewsRecipe): else: str = self.tag_to_string(item) item.replaceWith(str) - for item in soup.findAll('img'): - if not item.has_key('alt'): # noqa - item['alt'] = 'image' + for item in soup.findAll('img', alt=False): + item['alt'] = 'image' return soup diff --git a/recipes/ajc.recipe b/recipes/ajc.recipe index f27a2fc8e1..0b884373b7 100644 --- a/recipes/ajc.recipe +++ b/recipes/ajc.recipe @@ -112,7 +112,7 @@ class AdvancedUserRecipe1282101454(BasicNewsRecipe): for div in soup.findAll('div', attrs={'class': re.compile(self.author_reg_exp, re.IGNORECASE)}): div.extract() for auth in div.findAll('a'): - if (auth.has_key('class') and auth['class'] == 'cm-source-image'): # noqa + if auth.get('class') == 'cm-source-image': continue names = names + comma + auth.contents[0] comma = ', ' diff --git a/recipes/animal_politico.recipe b/recipes/animal_politico.recipe index adbd6d7b3a..61674548c6 100644 --- a/recipes/animal_politico.recipe +++ b/recipes/animal_politico.recipe @@ -25,7 +25,7 @@ class AdvancedUserRecipe1290663986(BasicNewsRecipe): def generic_parse(self, soup): articles = [] # soup.findAll('li', 'hentry'): - for entry in soup.findAll(lambda tag: tag.name == 'li' and tag.has_key('class') and tag['class'].find('hentry') != -1): # noqa + for entry in soup.findAll('li', attrs={'class': lambda x: x and 'hentry' in x}): article_url = entry.a['href'] + '?print=yes' article_title = entry.find('h3', 'entry-title') article_title = self.tag_to_string(article_title) @@ -48,7 +48,7 @@ class AdvancedUserRecipe1290663986(BasicNewsRecipe): def plumaje_parse(self, soup): articles = [] - blogs_soup = soup.find(lambda tag: tag.name == 'ul' and tag.has_key('class') and tag['class'].find('bloglist-fecha') != -1) # noqa + blogs_soup = soup.find('ul', attrs={'class': lambda x: x and 'bloglist-fecha' in x}) for entry in blogs_soup.findAll('li'): article_title = entry.p article_url = article_title.a['href'] + '?print=yes' @@ -69,7 +69,7 @@ class AdvancedUserRecipe1290663986(BasicNewsRecipe): def boca_parse(self, soup): articles = [] # soup.findAll('li', 'hentry'): - for entry in soup.findAll(lambda tag: tag.name == 'div' and tag.has_key('class') and tag['class'].find('hentry') != -1): # noqa + for entry in soup.findAll('div', attrs={'class': lambda x: x and 'hentry' in x}): article_title = entry.find('h2', 'entry-title') article_url = article_title.a['href'] + '?print=yes' article_title = self.tag_to_string(article_title) diff --git a/recipes/ba_herald.recipe b/recipes/ba_herald.recipe index f8e2272d80..91cac54aa8 100644 --- a/recipes/ba_herald.recipe +++ b/recipes/ba_herald.recipe @@ -62,8 +62,8 @@ class BuenosAiresHerald(BasicNewsRecipe): soup = self.index_to_soup(feedurl) for item in soup.findAll('div', attrs={'class': 'nota_texto_seccion'}): description = self.tag_to_string(item.h2) - atag = item.h2.find('a') - if atag and atag.has_key('href'): # noqa + atag = item.h2.find('a', href=True) + if atag is not None: url = self.INDEX + atag['href'] title = description date = strftime(self.timefmt) diff --git a/recipes/benchmark_pl.recipe b/recipes/benchmark_pl.recipe index 6433742978..83b1b465c9 100644 --- a/recipes/benchmark_pl.recipe +++ b/recipes/benchmark_pl.recipe @@ -50,8 +50,8 @@ class BenchmarkPl(BasicNewsRecipe): def preprocess_html(self, soup): self.append_page(soup, soup.body) - for a in soup('a'): - if a.has_key('href') and not a['href'].startswith('http'): # noqa + for a in soup.findAll('a', href=True): + if not a['href'].startswith('http'): a['href'] = self.INDEX + a['href'] for r in soup.findAll(attrs={'class': ['comments', 'body']}): r.extract() diff --git a/recipes/bighollywood.recipe b/recipes/bighollywood.recipe index 43d30795b5..0f0c06760d 100644 --- a/recipes/bighollywood.recipe +++ b/recipes/bighollywood.recipe @@ -55,7 +55,6 @@ class BigHollywood(BasicNewsRecipe): else: str = self.tag_to_string(item) item.replaceWith(str) - for item in soup.findAll('img'): - if not item.has_key('alt'): # noqa - item['alt'] = 'image' + for item in soup.findAll('img', alt=False): + item['alt'] = 'image' return soup diff --git a/recipes/business_insider.recipe b/recipes/business_insider.recipe index 0720600496..d04913ea17 100644 --- a/recipes/business_insider.recipe +++ b/recipes/business_insider.recipe @@ -59,7 +59,6 @@ class Business_insider(BasicNewsRecipe): if item.string is not None: tstr = item.string item.replaceWith(tstr) - for item in soup.findAll('img'): - if not item.has_key('alt'): # noqa - item['alt'] = 'image' + for item in soup.findAll('img', alt=False): + item['alt'] = 'image' return soup diff --git a/recipes/calgary_herald.recipe b/recipes/calgary_herald.recipe index 02bd202987..d3c4e9d554 100644 --- a/recipes/calgary_herald.recipe +++ b/recipes/calgary_herald.recipe @@ -286,7 +286,7 @@ class CanWestPaper(BasicNewsRecipe): else: description = self.tag_to_string(dtag, False) print("DESCRIPTION: " + description) - if not articles.has_key(key): # noqa + if key not in articles: articles[key] = [] articles[key].append(dict( title=title, url=url, date='', description=description, author='', content='')) @@ -310,5 +310,5 @@ class CanWestPaper(BasicNewsRecipe): for (k, url) in self.postmedia_index_pages: parse_web_index(k, url) - ans = [(key, articles[key]) for key in ans if articles.has_key(key)] # noqa + ans = [(key, articles[key]) for key in ans if key in articles] return ans diff --git a/recipes/cd_action.recipe b/recipes/cd_action.recipe index fd1e7862c2..4d132a8830 100644 --- a/recipes/cd_action.recipe +++ b/recipes/cd_action.recipe @@ -22,7 +22,7 @@ class CD_Action(BasicNewsRecipe): return getattr(self, 'cover_url', self.cover_url) def preprocess_html(self, soup): - for a in soup('a'): - if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']: # noqa + for a in soup.findAll('a', href=True): + if 'http://' not in a['href'] and 'https://' not in a['href']: a['href'] = self.index + a['href'] return soup diff --git a/recipes/chr_mon.recipe b/recipes/chr_mon.recipe index d2c4519e9f..3ffa429b9a 100644 --- a/recipes/chr_mon.recipe +++ b/recipes/chr_mon.recipe @@ -101,10 +101,7 @@ class CSMonitor(BasicNewsRecipe): else: str = self.tag_to_string(item) item.replaceWith(str) - for item in soup.findAll('img'): + for item in soup.findAll('img', src=True): if 'scorecardresearch' in item['src']: item.extract() - else: - if not item.has_key('alt'): # noqa - item['alt'] = 'image' return soup diff --git a/recipes/cinebel_be.recipe b/recipes/cinebel_be.recipe index 74d5ac1a47..56f8b4d8f0 100644 --- a/recipes/cinebel_be.recipe +++ b/recipes/cinebel_be.recipe @@ -36,10 +36,9 @@ class Cinebel(BasicNewsRecipe): ] def preprocess_html(self, soup): - for alink in soup.findAll('a'): - if alink.has_key('href'): # noqa - tstr = "Site officiel: " + alink['href'] - alink.replaceWith(tstr) + for alink in soup.findAll('a', href=True): + tstr = "Site officiel: " + alink['href'] + alink.replaceWith(tstr) return soup def get_cover_url(self): diff --git a/recipes/cio_magazine.recipe b/recipes/cio_magazine.recipe index 5e8c9b39b2..e2c04f7e0c 100644 --- a/recipes/cio_magazine.recipe +++ b/recipes/cio_magazine.recipe @@ -131,12 +131,12 @@ class CIO_Magazine(BasicNewsRecipe): # Esto esta copiado del NY times feed = key if key is not None else 'Uncategorized' - if not articles.has_key(feed): # noqa + if feed not in articles: articles[feed] = [] if 'podcasts' not in url: articles[feed].append( dict(title=title, url=url, date=pubdate, description=description, content='')) - feeds = [(k, articles[k]) for k in feeds if articles.has_key(k)] # noqa + feeds = [(k, articles[k]) for k in feeds if k in articles] return feeds diff --git a/recipes/cnd.recipe b/recipes/cnd.recipe index 0f8e70ca35..320f355e01 100644 --- a/recipes/cnd.recipe +++ b/recipes/cnd.recipe @@ -54,7 +54,7 @@ class TheCND(BasicNewsRecipe): if re.search('cm', date): continue if (date is not None) and len(date) > 2: - if not articles.has_key(date): # noqa + if date not in articles: articles[date] = [] articles[date].append( {'title': title, 'url': url, 'description': '', 'date': ''}) diff --git a/recipes/cnd_weekly.recipe b/recipes/cnd_weekly.recipe index ae748c2e76..7566ec9548 100644 --- a/recipes/cnd_weekly.recipe +++ b/recipes/cnd_weekly.recipe @@ -54,7 +54,7 @@ class TheCND(BasicNewsRecipe): continue self.log('\tFound article: ', title, 'at', url, '@', date) if (date is not None) and len(date) > 2: - if not articles.has_key(date): # noqa + if date not in articles: articles[date] = [] articles[date].append( {'title': title, 'url': url, 'description': '', 'date': ''}) diff --git a/recipes/cosmopolitan.recipe b/recipes/cosmopolitan.recipe index 9b84088764..ab31ab88bb 100644 --- a/recipes/cosmopolitan.recipe +++ b/recipes/cosmopolitan.recipe @@ -53,8 +53,8 @@ class General(BasicNewsRecipe): for item in soup.body.findAll(name=['table', 'td', 'tr', 'th', 'caption', 'thead', 'tfoot', 'tbody', 'colgroup', 'col']): item.name = 'div' for attrib in attribs: - if item.has_key(attrib): # noqa - del item[attrib] + item[attrib] = '' + del item[attrib] return soup def get_cover_url(self): diff --git a/recipes/cubadebate.recipe b/recipes/cubadebate.recipe index 2ab9c79c8b..ea87b6688c 100644 --- a/recipes/cubadebate.recipe +++ b/recipes/cubadebate.recipe @@ -46,7 +46,6 @@ class CubaDebate(BasicNewsRecipe): def preprocess_html(self, soup): for item in soup.findAll(style=True): del item['style'] - for item in soup.findAll('img'): - if not item.has_key('alt'): # noqa - item['alt'] = 'image' + for item in soup.findAll('img', alt=False): + item['alt'] = 'image' return soup diff --git a/recipes/dailytportal.recipe b/recipes/dailytportal.recipe index 37b749f8fa..00deb4c1db 100644 --- a/recipes/dailytportal.recipe +++ b/recipes/dailytportal.recipe @@ -57,7 +57,6 @@ class Pagina12(BasicNewsRecipe): else: str = self.tag_to_string(item) item.replaceWith(str) - for item in soup.findAll('img'): - if not item.has_key('alt'): # noqa - item['alt'] = 'image' + for item in soup.findAll('img', alt=False): + item['alt'] = 'image' return soup diff --git a/recipes/degentenaar.recipe b/recipes/degentenaar.recipe index 6b6c52bef0..2c08ee7859 100644 --- a/recipes/degentenaar.recipe +++ b/recipes/degentenaar.recipe @@ -64,7 +64,7 @@ class DeGentenaarOnline(BasicNewsRecipe): del item['style'] for item in soup.findAll('span'): item.name = 'div' - if item.has_key('id') and item['id'] == 'lblArticleTitle': # noqa + if item.get('id') == 'lblArticleTitle': item.name = 'h3' soup.html['lang'] = self.lang diff --git a/recipes/deutsche_welle_bs.recipe b/recipes/deutsche_welle_bs.recipe index 72a3761219..0b852c94c8 100644 --- a/recipes/deutsche_welle_bs.recipe +++ b/recipes/deutsche_welle_bs.recipe @@ -65,8 +65,8 @@ class DeutscheWelle_bs(BasicNewsRecipe): if limg: item.name = 'div' del item['href'] - if item.has_key('target'): # noqa - del item['target'] + item['target'] = '' + del item['target'] else: str = self.tag_to_string(item) item.replaceWith(str) diff --git a/recipes/deutsche_welle_hr.recipe b/recipes/deutsche_welle_hr.recipe index 1c1427a8e4..906e4a1d39 100644 --- a/recipes/deutsche_welle_hr.recipe +++ b/recipes/deutsche_welle_hr.recipe @@ -63,8 +63,8 @@ class DeutscheWelle_hr(BasicNewsRecipe): if limg: item.name = 'div' del item['href'] - if item.has_key('target'): # noqa - del item['target'] + item['target'] = '' + del item['target'] else: str = self.tag_to_string(item) item.replaceWith(str) diff --git a/recipes/deutsche_welle_pt.recipe b/recipes/deutsche_welle_pt.recipe index 1fca72963b..4b9a9ea9dc 100644 --- a/recipes/deutsche_welle_pt.recipe +++ b/recipes/deutsche_welle_pt.recipe @@ -54,8 +54,8 @@ class DeutscheWelle_pt(BasicNewsRecipe): if limg: item.name = 'div' del item['href'] - if item.has_key('target'): # noqa - del item['target'] + item['target'] = '' + del item['target'] else: str = self.tag_to_string(item) item.replaceWith(str) diff --git a/recipes/deutsche_welle_sr.recipe b/recipes/deutsche_welle_sr.recipe index 3ea1a2a10f..b9c67e4976 100644 --- a/recipes/deutsche_welle_sr.recipe +++ b/recipes/deutsche_welle_sr.recipe @@ -68,8 +68,8 @@ class DeutscheWelle_sr(BasicNewsRecipe): if limg: item.name = 'div' del item['href'] - if item.has_key('target'): # noqa - del item['target'] + item['target'] = '' + del item['target'] else: str = self.tag_to_string(item) item.replaceWith(str) diff --git a/recipes/dnevnik_cro.recipe b/recipes/dnevnik_cro.recipe index 9060a9185c..02f4a3bcd8 100644 --- a/recipes/dnevnik_cro.recipe +++ b/recipes/dnevnik_cro.recipe @@ -55,8 +55,8 @@ class DnevnikCro(BasicNewsRecipe): for item in soup.body.findAll(name=['table', 'td', 'tr', 'th', 'caption', 'thead', 'tfoot', 'tbody', 'colgroup', 'col']): item.name = 'div' for attrib in attribs: - if item.has_key(attrib): # noqa - del item[attrib] + item[attrib] = '' + del item[attrib] mlang = Tag(soup, 'meta', [ ("http-equiv", "Content-Language"), ("content", self.lang)]) diff --git a/recipes/dobanevinosti.recipe b/recipes/dobanevinosti.recipe index 242edabf46..90cbc5866c 100644 --- a/recipes/dobanevinosti.recipe +++ b/recipes/dobanevinosti.recipe @@ -37,7 +37,6 @@ class DobaNevinosti(BasicNewsRecipe): def preprocess_html(self, soup): for item in soup.findAll(style=True): del item['style'] - for item in soup.findAll('img'): - if not item.has_key('alt'): # noqa - item['alt'] = 'image' + for item in soup.findAll('img', alt=False): + item['alt'] = 'image' return soup diff --git a/recipes/dobreprogamy.recipe b/recipes/dobreprogamy.recipe index 878ceb57c3..c97d9c0e8f 100644 --- a/recipes/dobreprogamy.recipe +++ b/recipes/dobreprogamy.recipe @@ -28,8 +28,8 @@ class Dobreprogramy_pl(BasicNewsRecipe): ('Blogi', 'http://feeds.feedburner.com/dobreprogramy/BlogCzytelnikow')] def preprocess_html(self, soup): - for a in soup('a'): - if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']: # noqa + for a in soup('a', href=True): + if 'http://' not in a['href'] and 'https://' not in a['href']: a['href'] = self.index + a['href'] for r in soup.findAll('iframe'): r.parent.extract() diff --git a/recipes/dzieje_pl.recipe b/recipes/dzieje_pl.recipe index 7a2e9284a6..57d243bd74 100644 --- a/recipes/dzieje_pl.recipe +++ b/recipes/dzieje_pl.recipe @@ -81,8 +81,8 @@ class Dzieje(BasicNewsRecipe): return feeds def preprocess_html(self, soup): - for a in soup('a'): - if a.has_key('href') and not a['href'].startswith('http'): # noqa + for a in soup('a', href=True): + if not a['href'].startswith('http'): a['href'] = self.index + a['href'] self.append_page(soup, soup.body) return soup diff --git a/recipes/edmonton_journal.recipe b/recipes/edmonton_journal.recipe index e8e6ad5a87..9e7ae425bf 100644 --- a/recipes/edmonton_journal.recipe +++ b/recipes/edmonton_journal.recipe @@ -286,7 +286,7 @@ class CanWestPaper(BasicNewsRecipe): else: description = self.tag_to_string(dtag, False) print("DESCRIPTION: " + description) - if not articles.has_key(key): # noqa + if key not in articles: articles[key] = [] articles[key].append(dict( title=title, url=url, date='', description=description, author='', content='')) @@ -310,5 +310,5 @@ class CanWestPaper(BasicNewsRecipe): for (k, url) in self.postmedia_index_pages: parse_web_index(k, url) - ans = [(key, articles[key]) for key in ans if articles.has_key(key)] # noqa + ans = [(key, articles[key]) for key in ans if key in articles] return ans diff --git a/recipes/el_diplo.recipe b/recipes/el_diplo.recipe index 119e010eb9..d9fbe755a8 100644 --- a/recipes/el_diplo.recipe +++ b/recipes/el_diplo.recipe @@ -113,11 +113,11 @@ class ElDiplo_Recipe(BasicNewsRecipe): if aut: auth = self.tag_to_string(aut, use_alt=False).strip() - if not articles.has_key(section): # noqa + if section not in articles: # noqa articles[section] = [] articles[section].append(dict( title=title, author=auth, url=url, date=None, description=description, content='')) - ans = [(s, articles[s]) for s in ans if articles.has_key(s)] # noqa + ans = [(s, articles[s]) for s in ans if s in articles] return ans diff --git a/recipes/elclubdelebook.recipe b/recipes/elclubdelebook.recipe index 47a7043b3a..f186140210 100644 --- a/recipes/elclubdelebook.recipe +++ b/recipes/elclubdelebook.recipe @@ -53,7 +53,6 @@ class ElClubDelEbook(BasicNewsRecipe): else: str = self.tag_to_string(item) item.replaceWith(str) - for item in soup.findAll('img'): - if not item.has_key('alt'): # noqa - item['alt'] = 'image' + for item in soup.findAll('img', alt=False): + item['alt'] = 'image' return soup diff --git a/recipes/elpais_semanal.recipe b/recipes/elpais_semanal.recipe index 1a607b4858..69a0a0fb42 100644 --- a/recipes/elpais_semanal.recipe +++ b/recipes/elpais_semanal.recipe @@ -35,17 +35,16 @@ class ElPaisSemanal(BasicNewsRecipe): def parse_index(self): articles = [] soup = self.index_to_soup(self.index) - for item in soup.findAll('a', attrs={'class': ['g19i003', 'g17r003', 'g17i003']}): + for item in soup.findAll('a', attrs={'class': ['g19i003', 'g17r003', 'g17i003']}, href=True): description = '' title_prefix = '' feed_link = item - if item.has_key('href'): # noqa - url = 'http://www.elpais.com' + item['href'].rpartition('/')[0] - title = title_prefix + self.tag_to_string(feed_link) - date = strftime(self.timefmt) - articles.append({ - 'title': title, 'date': date, 'url': url, 'description': description - }) + url = 'http://www.elpais.com' + item['href'].rpartition('/')[0] + title = title_prefix + self.tag_to_string(feed_link) + date = strftime(self.timefmt) + articles.append({ + 'title': title, 'date': date, 'url': url, 'description': description + }) return [(soup.head.title.string, articles)] def print_version(self, url): diff --git a/recipes/eluniversalimpresa.recipe b/recipes/eluniversalimpresa.recipe index e0cf058494..331da13a86 100644 --- a/recipes/eluniversalimpresa.recipe +++ b/recipes/eluniversalimpresa.recipe @@ -31,7 +31,7 @@ class ElUniversalImpresaRecipe(BasicNewsRecipe): table = soup.find('table', attrs={'width': '500'}) articles = [] - for td in table.findAll(lambda tag: tag.name == 'td' and tag.has_key('class') and tag['class'] == 'arnegro12'): # noqa + for td in table.findAll('td', attrs={'class': 'arnegro12'}): a = td.a a.extract() title = self.tag_to_string(a) @@ -79,8 +79,8 @@ class ElUniversalImpresaRecipe(BasicNewsRecipe): tag = soup.find('font', attrs={'color': '#0F046A'}) if tag: for attr in ['color', 'face', 'helvetica,', 'sans-serif', 'size']: - if tag.has_key(attr): # noqa - del tag[attr] + tag[attr] = '' + del tag[attr] tag.name = 'h1' return soup diff --git a/recipes/espn.recipe b/recipes/espn.recipe index 8c4ce332fa..e29a88b9cb 100644 --- a/recipes/espn.recipe +++ b/recipes/espn.recipe @@ -62,8 +62,8 @@ class ESPN(BasicNewsRecipe): ] def preprocess_html(self, soup): - for div in soup.findAll('div'): - if div.has_key('style') and 'px' in div['style']: # noqa + for div in soup.findAll('div', style=True): + if 'px' in div['style']: div['style'] = '' return soup diff --git a/recipes/estadao.recipe b/recipes/estadao.recipe index cbb06247fe..b2f8215528 100644 --- a/recipes/estadao.recipe +++ b/recipes/estadao.recipe @@ -88,7 +88,7 @@ class Estadao(BasicNewsRecipe): def postprocess_html(self, soup, first): # process all the images. assumes that the new html has the correct # path - for tag in soup.findAll(lambda tag: tag.name.lower() == 'img' and tag.has_key('src')): # noqa + for tag in soup.findAll('img', src=True): iurl = tag['src'] img = Image() img.open(iurl) diff --git a/recipes/financialsense.recipe b/recipes/financialsense.recipe index 9fcfa17413..fdf45c45b8 100644 --- a/recipes/financialsense.recipe +++ b/recipes/financialsense.recipe @@ -57,7 +57,6 @@ class FinancialSense(BasicNewsRecipe): else: str = self.tag_to_string(item) item.replaceWith(str) - for item in soup.findAll('img'): - if not item.has_key('alt'): # noqa - item['alt'] = 'image' + for item in soup.findAll('img', alt=False): + item['alt'] = 'image' return soup diff --git a/recipes/freeway.recipe b/recipes/freeway.recipe index 3662fac972..9e91929371 100644 --- a/recipes/freeway.recipe +++ b/recipes/freeway.recipe @@ -84,8 +84,8 @@ class General(BasicNewsRecipe): for item in soup.body.findAll(name=['table', 'td', 'tr', 'th', 'caption', 'thead', 'tfoot', 'tbody', 'colgroup', 'col']): item.name = 'div' for attrib in attribs: - if item.has_key(attrib): # noqa - del item[attrib] + item[attrib] = '' + del item[attrib] return soup def get_cover_url(self): diff --git a/recipes/gameplay_pl.recipe b/recipes/gameplay_pl.recipe index 10f71c1fad..351cce20e8 100644 --- a/recipes/gameplay_pl.recipe +++ b/recipes/gameplay_pl.recipe @@ -27,7 +27,7 @@ class Gameplay_pl(BasicNewsRecipe): return url def preprocess_html(self, soup): - for a in soup('a'): - if a.has_key('href') and '../' in a['href']: # noqa + for a in soup('a', href=True): + if '../' in a['href']: a['href'] = self.index + a['href'][2:] return soup diff --git a/recipes/geek_poke.recipe b/recipes/geek_poke.recipe index e4e1092940..22193d8bbf 100644 --- a/recipes/geek_poke.recipe +++ b/recipes/geek_poke.recipe @@ -56,7 +56,7 @@ class AdvancedUserRecipe1307556816(BasicNewsRecipe): extra_css = 'body, h3, p, div, span{margin:0px; padding:0px} h3.entry-header{font-size: 0.8em} div.entry-body{font-size: 0.7em}' def postprocess_html(self, soup, first): - for tag in soup.findAll(lambda tag: tag.name.lower() == 'img' and tag.has_key('src')): # noqa + for tag in soup.findAll('img', src=True): iurl = tag['src'] img = Image() img.open(iurl) diff --git a/recipes/gildia_pl.recipe b/recipes/gildia_pl.recipe index 0cc92013ca..4cdf03ad28 100644 --- a/recipes/gildia_pl.recipe +++ b/recipes/gildia_pl.recipe @@ -58,8 +58,8 @@ class Gildia(BasicNewsRecipe): def preprocess_html(self, soup): title = soup.title.renderContents().lower() - for a in soup('a'): - if a.has_key('href') and not a['href'].startswith('http'): # noqa + for a in soup('a', href=True): + if not a['href'].startswith('http'): if '/gry/' in a['href']: a['href'] = 'http://www.gry.gildia.pl' + a['href'] elif u'książk' in title or u'komiks' in title: diff --git a/recipes/gram_pl.recipe b/recipes/gram_pl.recipe index b66ac1e241..77267d93ed 100644 --- a/recipes/gram_pl.recipe +++ b/recipes/gram_pl.recipe @@ -43,8 +43,8 @@ class Gram_pl(BasicNewsRecipe): tag.p.img.extract() tag.p.insert(len(tag.p.contents) - 2, BeautifulSoup('
'} - if not articles.has_key(index): # noqa + if index not in articles: articles[index] = [] articles[index].append(article) @@ -90,7 +90,7 @@ class NrcNextRecipe(BasicNewsRecipe): indices, {u'columnisten': 1, u'koken': 3, u'geld & werk': 2, u'vandaag': 0}) # Apply this sort order to the actual list of feeds and articles answer = [(key, articles[key]) - for key in indices if articles.has_key(key)] # noqa + for key in indices if key in articles] return answer diff --git a/recipes/newsweek_polska.recipe b/recipes/newsweek_polska.recipe index a834b2150e..eda4146c4d 100644 --- a/recipes/newsweek_polska.recipe +++ b/recipes/newsweek_polska.recipe @@ -201,8 +201,7 @@ class Newsweek(BasicNewsRecipe): self.DATE = matches.group(0) # cover - img = main_section.find(lambda tag: tag.name == 'img' and tag.has_key( # noqa - 'alt') and tag.has_key('title')) + img = main_section.find('img', src=True, alt=True, title=True) self.cover_url = img['src'] feeds = [] articles = {} @@ -233,7 +232,7 @@ class Newsweek(BasicNewsRecipe): if article is None: continue - if articles.has_key(section): # noqa + if section in articles: articles[section].append(article) else: articles[section] = [article] diff --git a/recipes/novosti.recipe b/recipes/novosti.recipe index 23383aad75..fee0c4d1b0 100644 --- a/recipes/novosti.recipe +++ b/recipes/novosti.recipe @@ -82,7 +82,6 @@ class Novosti(BasicNewsRecipe): else: str = self.tag_to_string(item) item.replaceWith(str) - for item in soup.findAll('img'): - if not item.has_key('alt'): # noqa - item['alt'] = 'image' + for item in soup.findAll('img', alt=False): + item['alt'] = 'image' return soup diff --git a/recipes/nsfw_corp.recipe b/recipes/nsfw_corp.recipe index 05e832a80d..3681daad63 100644 --- a/recipes/nsfw_corp.recipe +++ b/recipes/nsfw_corp.recipe @@ -55,8 +55,8 @@ class NotSafeForWork(BasicNewsRecipe): def get_feeds(self): self.feeds = [] soup = self.index_to_soup(self.SETTINGS) - for item in soup.findAll('input', attrs={'type': 'text'}): - if item.has_key('value') and item['value'].startswith('https://www.nsfwcorp.com/feed/'): # noqa + for item in soup.findAll('input', value=True, attrs={'type': 'text'}): + if item['value'].startswith('https://www.nsfwcorp.com/feed/'): self.feeds.append(item['value']) return self.feeds return self.feeds diff --git a/recipes/nspm.recipe b/recipes/nspm.recipe index f34e823d20..f2e41e9d1d 100644 --- a/recipes/nspm.recipe +++ b/recipes/nspm.recipe @@ -109,7 +109,6 @@ class Nspm(BasicNewsRecipe): else: str = self.tag_to_string(item) item.replaceWith(str) - for item in soup.findAll('img'): - if not item.has_key('alt'): # noqa - item['alt'] = 'image' + for item in soup.findAll('img', alt=False): + item['alt'] = 'image' return soup diff --git a/recipes/ottawa_citizen.recipe b/recipes/ottawa_citizen.recipe index f72b49a78a..a5f16c5bef 100644 --- a/recipes/ottawa_citizen.recipe +++ b/recipes/ottawa_citizen.recipe @@ -286,7 +286,7 @@ class CanWestPaper(BasicNewsRecipe): else: description = self.tag_to_string(dtag, False) print("DESCRIPTION: " + description) - if not articles.has_key(key): # noqa + if key not in articles: articles[key] = [] articles[key].append(dict( title=title, url=url, date='', description=description, author='', content='')) @@ -310,5 +310,5 @@ class CanWestPaper(BasicNewsRecipe): for (k, url) in self.postmedia_index_pages: parse_web_index(k, url) - ans = [(key, articles[key]) for key in ans if articles.has_key(key)] # noqa + ans = [(key, articles[key]) for key in ans if key in articles] return ans diff --git a/recipes/pobjeda.recipe b/recipes/pobjeda.recipe index 9f52941f88..5671bf30dc 100644 --- a/recipes/pobjeda.recipe +++ b/recipes/pobjeda.recipe @@ -86,8 +86,8 @@ class Pobjeda(BasicNewsRecipe): soup = self.index_to_soup(feedurl) for item in soup.findAll('div', attrs={'class': 'vijest'}): description = self.tag_to_string(item.h2) - atag = item.h1.find('a') - if atag and atag.has_key('href'): # noqa + atag = item.h1.find('a', href=True) + if atag is not None: url = self.INDEX + '/' + atag['href'] title = self.tag_to_string(atag) date = strftime(self.timefmt) diff --git a/recipes/politika.recipe b/recipes/politika.recipe index 48e5359ec9..f46c664fe3 100644 --- a/recipes/politika.recipe +++ b/recipes/politika.recipe @@ -58,10 +58,9 @@ class Politika(BasicNewsRecipe): del item['style'] for item in soup.findAll('a', attrs={'class': 'category'}): item.name = 'span' - if item.has_key('href'): # noqa - del item['href'] - if item.has_key('title'): # noqa - del item['title'] + item['href'] = item['title'] = '' + del item['href'] + del item['title'] return soup def get_cover_url(self): diff --git a/recipes/polityka.recipe b/recipes/polityka.recipe index 33e73c7c2f..8deb4f98f5 100644 --- a/recipes/polityka.recipe +++ b/recipes/polityka.recipe @@ -51,7 +51,7 @@ class Polityka(BasicNewsRecipe): 'http://archiwum.polityka.pl' + div.a['href'],) section = self.tag_to_string(article_page.find( 'h2', attrs={'class': 'box_nag'})).split('/')[0].lstrip().rstrip() - if not articles.has_key(section): # noqa + if section not in articles: articles[section] = [] articles[section].append({ 'title': self.tag_to_string(div.a), diff --git a/recipes/regina_leader_post.recipe b/recipes/regina_leader_post.recipe index ad56dde0fc..1b928de984 100644 --- a/recipes/regina_leader_post.recipe +++ b/recipes/regina_leader_post.recipe @@ -190,7 +190,7 @@ class CanWestPaper(BasicNewsRecipe): # Find each instance of class="sectiontitle", class="featurecontent" for divtag in soup.findAll('div', attrs={'class': ["section_title02", "featurecontent"]}): if divtag['class'].startswith('section_title'): - # div contains section title + # div contains section title if not divtag.h3: continue key = self.tag_to_string(divtag.h3, False) @@ -215,11 +215,11 @@ class CanWestPaper(BasicNewsRecipe): autag = divtag.find('h4') if autag: author = self.tag_to_string(autag, False) - if not articles.has_key(key): # noqa + if key not in articles: articles[key] = [] articles[key].append(dict(title=title, url=url, date=pubdate, description=description, author=author, content='')) ans = [(keyl, articles[keyl]) - for keyl in ans if articles.has_key(keyl)] # noqa + for keyl in ans if keyl in articles] return ans diff --git a/recipes/republika.recipe b/recipes/republika.recipe index 836c6d028c..02daafbe71 100644 --- a/recipes/republika.recipe +++ b/recipes/republika.recipe @@ -46,8 +46,8 @@ class Republika(BasicNewsRecipe): for item in soup.body.findAll(name=['table', 'td', 'tr', 'th', 'caption', 'thead', 'tfoot', 'tbody', 'colgroup', 'col']): item.name = 'div' for attrib in attribs: - if item.has_key(attrib): # noqa - del item[attrib] + item[attrib] = '' + del item[attrib] return soup def parse_index(self): diff --git a/recipes/saskatoon_star_phoenix.recipe b/recipes/saskatoon_star_phoenix.recipe index 1bac2a795b..c78fef6160 100644 --- a/recipes/saskatoon_star_phoenix.recipe +++ b/recipes/saskatoon_star_phoenix.recipe @@ -190,7 +190,7 @@ class CanWestPaper(BasicNewsRecipe): # Find each instance of class="sectiontitle", class="featurecontent" for divtag in soup.findAll('div', attrs={'class': ["section_title02", "featurecontent"]}): if divtag['class'].startswith('section_title'): - # div contains section title + # div contains section title if not divtag.h3: continue key = self.tag_to_string(divtag.h3, False) @@ -215,10 +215,10 @@ class CanWestPaper(BasicNewsRecipe): autag = divtag.find('h4') if autag: author = self.tag_to_string(autag, False) - if not articles.has_key(key): # noqa + if key not in articles: articles[key] = [] articles[key].append(dict(title=title, url=url, date=pubdate, description=description, author=author, content='')) - ans = [(k, articles[k]) for k in ans if articles.has_key(k)] # noqa + ans = [(k, articles[k]) for k in ans if k in articles] return ans diff --git a/recipes/scprint.recipe b/recipes/scprint.recipe index 954b2ba011..e4a2edc3b2 100644 --- a/recipes/scprint.recipe +++ b/recipes/scprint.recipe @@ -45,11 +45,11 @@ class SCPrintMagazine(BasicNewsRecipe): if arttitlet is not None: mylink = arttitlet.find('a') if mylink is not None: - if mylink.has_key('title'): # noqa + if mylink.get('title'): arttitle = mylink['title'] else: arttitle = 'unknown' - if mylink.has_key('href'): # noqa + if mylink.get('href'): artlink = mylink['href'] artlink = artlink.replace( "/article", "/printarticle") diff --git a/recipes/sueddeutsche_mobil.recipe b/recipes/sueddeutsche_mobil.recipe index ccaf22512e..2b4d8415f9 100644 --- a/recipes/sueddeutsche_mobil.recipe +++ b/recipes/sueddeutsche_mobil.recipe @@ -81,7 +81,7 @@ class SZmobil(BasicNewsRecipe): if itt['href'].startswith('article.php?id='): article_url = itt['href'] article_id = int( - re.search("id=(\d*)&etag=", itt['href']).group(1)) + re.search(r"id=(\d*)&etag=", itt['href']).group(1)) # first check if link is a special article in section # "Meinungsseite" @@ -104,7 +104,7 @@ class SZmobil(BasicNewsRecipe): # just another link ("mehr") to an article continue - if itt.has_key('id'): # noqa + if itt.get('id') is not None: shorttitles[article_id] = article_name else: articles.append( @@ -118,7 +118,7 @@ class SZmobil(BasicNewsRecipe): # pubdate = strftime('') pubdate = strftime('[%a, %d %b]') description = '' - if shorttitles.has_key(article_id): # noqa + if shorttitles.get(article_id) is not None: description = shorttitles[article_id] # we do not want the flag ("Impressum") if "HERAUSGEGEBEN VOM" in description: diff --git a/recipes/techcrunch.recipe b/recipes/techcrunch.recipe index 6d130e787a..cfdda7f233 100644 --- a/recipes/techcrunch.recipe +++ b/recipes/techcrunch.recipe @@ -55,7 +55,6 @@ class TechCrunch(BasicNewsRecipe): else: str = self.tag_to_string(item) item.replaceWith(str) - for item in soup.findAll('img'): - if not item.has_key('alt'): # noqa - item['alt'] = 'image' + for item in soup.findAll('img', alt=False): + item['alt'] = 'image' return soup diff --git a/recipes/the_age.recipe b/recipes/the_age.recipe index aad60d29b8..3728fd4d1a 100644 --- a/recipes/the_age.recipe +++ b/recipes/the_age.recipe @@ -45,7 +45,7 @@ class TheAge(BasicNewsRecipe): # Make sure to skip: TheAge - elif section and tag.has_key('href') and len(tag['href'].strip()) > 1: # noqa + elif section and tag.get('href'): url = tag['href'].strip() if url.startswith('/'): url = 'http://www.theage.com.au' + url @@ -105,7 +105,7 @@ class TheAge(BasicNewsRecipe): # Filter out what's left of the text-mode navigation stuff - if re.match('((\s)|(\ \;))*\[[\|\s*]*\]((\s)|(\ \;))*$', contents): + if re.match(r'((\s)|(\ \;))*\[[\|\s*]*\]((\s)|(\ \;))*$', contents): p.extract() continue diff --git a/recipes/thedgesingapore.recipe b/recipes/thedgesingapore.recipe index 57462b3465..9277cc297a 100644 --- a/recipes/thedgesingapore.recipe +++ b/recipes/thedgesingapore.recipe @@ -47,6 +47,6 @@ class Edgesingapore(BasicNewsRecipe): for item in soup.body.findAll(name=['table', 'td', 'tr', 'th', 'caption', 'thead', 'tfoot', 'tbody', 'colgroup', 'col']): item.name = 'div' for attrib in attribs: - if item.has_key(attrib): # noqa - del item[attrib] + item[attrib] = '' + del item[attrib] return self.adeify_images(soup) diff --git a/recipes/theonion.recipe b/recipes/theonion.recipe index dc5ba8a317..af97c0169a 100644 --- a/recipes/theonion.recipe +++ b/recipes/theonion.recipe @@ -78,12 +78,12 @@ class TheOnion(BasicNewsRecipe): if limg: item.name = 'div' item.attrs = [] - if not limg.has_key('alt'): # noqa + if not limg.get('alt'): limg['alt'] = 'image' else: str = self.tag_to_string(item) item.replaceWith(str) for item in soup.findAll('img'): - if item.has_key('data-src'): # noqa + if item.get('data-src'): item['src'] = item['data-src'] return soup diff --git a/recipes/tomshardware.recipe b/recipes/tomshardware.recipe index 8473fb34a3..27b33d8988 100644 --- a/recipes/tomshardware.recipe +++ b/recipes/tomshardware.recipe @@ -57,8 +57,8 @@ class Tomshardware(BasicNewsRecipe): def cleanup_image_tags(self, soup): for item in soup.findAll('img'): for attrib in ['height', 'width', 'border', 'align']: - if item.has_key(attrib): # noqa - del item[attrib] + item[attrib] = '' + del item[attrib] return soup def preprocess_html(self, soup): diff --git a/recipes/twitchfilms.recipe b/recipes/twitchfilms.recipe index 27144c3d14..3279b6c24c 100644 --- a/recipes/twitchfilms.recipe +++ b/recipes/twitchfilms.recipe @@ -45,7 +45,6 @@ class Twitchfilm(BasicNewsRecipe): else: str = self.tag_to_string(item) item.replaceWith(str) - for item in soup.findAll('img'): - if not item.has_key('alt'): # noqa - item['alt'] = 'image' + for item in soup.findAll('img', alt=False): + item['alt'] = 'image' return soup diff --git a/recipes/vancouver_province.recipe b/recipes/vancouver_province.recipe index a97c53750e..588133290d 100644 --- a/recipes/vancouver_province.recipe +++ b/recipes/vancouver_province.recipe @@ -299,7 +299,7 @@ class CanWestPaper(BasicNewsRecipe): else: description = self.tag_to_string(dtag, False) print("DESCRIPTION: " + description) - if not articles.has_key(key): # noqa + if key not in articles: articles[key] = [] articles[key].append(dict( title=title, url=url, date='', description=description, author='', content='')) @@ -323,5 +323,5 @@ class CanWestPaper(BasicNewsRecipe): for (k, url) in self.postmedia_index_pages: parse_web_index(k, url) - ans = [(key, articles[key]) for key in ans if articles.has_key(key)] # noqa + ans = [(key, articles[key]) for key in ans if key in articles] return ans diff --git a/recipes/vancouver_sun.recipe b/recipes/vancouver_sun.recipe index 899c8fbca9..e0c9e4fdce 100644 --- a/recipes/vancouver_sun.recipe +++ b/recipes/vancouver_sun.recipe @@ -287,7 +287,7 @@ class CanWestPaper(BasicNewsRecipe): else: description = self.tag_to_string(dtag, False) print("DESCRIPTION: " + description) - if not articles.has_key(key): # noqa + if key not in articles: articles[key] = [] articles[key].append(dict( title=title, url=url, date='', description=description, author='', content='')) @@ -311,5 +311,5 @@ class CanWestPaper(BasicNewsRecipe): for (k, url) in self.postmedia_index_pages: parse_web_index(k, url) - ans = [(key, articles[key]) for key in ans if articles.has_key(key)] # noqa + ans = [(key, articles[key]) for key in ans if key in articles] # noqa return ans diff --git a/recipes/variety.recipe b/recipes/variety.recipe index f54b732d13..2aa61b3c3f 100644 --- a/recipes/variety.recipe +++ b/recipes/variety.recipe @@ -62,7 +62,6 @@ class Variety(BasicNewsRecipe): else: str = self.tag_to_string(item) item.replaceWith(str) - for item in soup.findAll('img'): - if not item.has_key('alt'): # noqa - item['alt'] = 'image' + for item in soup.findAll('img', alt=False): + item['alt'] = 'image' return soup diff --git a/recipes/vedomosti.recipe b/recipes/vedomosti.recipe index 8133588a1e..f3e67fef11 100644 --- a/recipes/vedomosti.recipe +++ b/recipes/vedomosti.recipe @@ -62,16 +62,22 @@ class VedomostiRecipe(BasicNewsRecipe): if not feedData: raise NotImplementedError self.log("parse_index: Feed loaded successfully.") - if feedData.feed.has_key('title'): # noqa - self.title = feedData.feed.title - self.log("parse_index: Title updated to: ", self.title) - if feedData.feed.has_key('description'): # noqa - self.description = feedData.feed.description - self.log("parse_index: Description updated to: ", - self.description) + try: + if feedData.feed.title: + self.title = feedData.feed.title + self.log("parse_index: Title updated to: ", self.title) + except Exception: + pass + try: + if feedData.feed.description: + self.description = feedData.feed.description + self.log("parse_index: Description updated to: ", + self.description) + except Exception: + pass def get_virtual_feed_articles(feed): - if feeds.has_key(feed): # noqa + if feed in feeds: return feeds[feed][1] self.log("Adding new feed: ", feed) articles = [] @@ -88,7 +94,7 @@ class VedomostiRecipe(BasicNewsRecipe): continue article = {'title': title, 'url': link, 'description': item.get( 'description', ''), 'date': item.get('date', ''), 'content': ''} - if not item.has_key('tags'): # noqa + if not item.get('tags'): # noqa get_virtual_feed_articles('_default').append(article) continue for tag in item.tags: @@ -105,7 +111,7 @@ class VedomostiRecipe(BasicNewsRecipe): # Select sorted feeds first of all result = [] for feedName in self.sortOrder: - if (not feeds.has_key(feedName)): # noqa + if (not feeds.get(feedName)): continue result.append(feeds[feedName]) del feeds[feedName] @@ -142,9 +148,9 @@ class VedomostiRecipe(BasicNewsRecipe): imgDiv = Tag(soup, 'div') imgDiv['class'] = 'article_img' - if img.has_key('width'): # noqa + if img.get('width'): del(img['width']) - if img.has_key('height'): # noqa + if img.get('height'): del(img['height']) # find description @@ -180,11 +186,9 @@ class VedomostiRecipe(BasicNewsRecipe): contents.insert(len(contents.contents), authorsP) # Fix urls that use relative path - urls = contents.findAll('a') + urls = contents.findAll('a', href=True) if urls: for url in urls: - if not url.has_key('href'): # noqa - continue if '/' == url['href'][0]: url['href'] = self.base_url + url['href'] diff --git a/recipes/wapo_cartoons.recipe b/recipes/wapo_cartoons.recipe index 4fe51a5903..d4097910ac 100644 --- a/recipes/wapo_cartoons.recipe +++ b/recipes/wapo_cartoons.recipe @@ -94,8 +94,8 @@ class WaPoCartoonsRecipe(BasicNewsRecipe): img = soup.find('img', attrs={'class': 'pic_big'}) if img: td = img.parent - if td.has_key('style'): # noqa - del td['style'] + td['style'] = '' + del td['style'] td.name = 'div' td['id'] = 'comic_full' freshSoup.body.append(td) @@ -134,11 +134,8 @@ class WaPoCartoonsRecipe(BasicNewsRecipe): 'June': '06', 'July': '07', 'August': '08', 'September': '09', 'October': '10', 'November': '11', 'December': '12'} - opts = select.findAll('option') + opts = select.findAll('option', selected=False) for i in range(1, len(opts)): - if opts[i].has_key('selected'): # noqa - continue - dateString = self.tag_to_string(opts[i]) rest, sep, year = dateString.rpartition(', ') parts = rest.split(' ') diff --git a/recipes/wenxuecity-znjy.recipe b/recipes/wenxuecity-znjy.recipe index 97a0a2f98f..031175812e 100644 --- a/recipes/wenxuecity-znjy.recipe +++ b/recipes/wenxuecity-znjy.recipe @@ -42,11 +42,11 @@ class TheCND(BasicNewsRecipe): url = 'http://bbs.wenxuecity.com' + url title = self.tag_to_string(a) self.log('\tFound article: ', title, ' at:', url) - dateReg = re.search('(\d\d?)/(\d\d?)/(\d\d)', + dateReg = re.search(r'(\d\d?)/(\d\d?)/(\d\d)', self.tag_to_string(a.parent)) date = '%(y)s/%(m)02d/%(d)02d' % {'y': dateReg.group(3), 'm': int(dateReg.group(1)), 'd': int(dateReg.group(2))} - if not articles.has_key(date): # noqa + if date not in articles: # noqa articles[date] = [] articles[date].append( {'title': title, 'url': url, 'description': '', 'date': ''}) diff --git a/recipes/windows_star.recipe b/recipes/windows_star.recipe index ec97b1223e..d66e4cd9c1 100644 --- a/recipes/windows_star.recipe +++ b/recipes/windows_star.recipe @@ -96,10 +96,10 @@ class CanWestPaper(BasicNewsRecipe): autag = divtag.find('h4') if autag: author = self.tag_to_string(autag, False) - if not articles.has_key(key): # noqa + if key not in articles: articles[key] = [] articles[key].append(dict(title=title, url=url, date=pubdate, description=description, author=author, content='')) - ans = [(keyl, articles[key]) for keyl in ans if articles.has_key(keyl)] # noqa + ans = [(keyl, articles[key]) for keyl in ans if keyl in articles] return ans diff --git a/recipes/windsor_star.recipe b/recipes/windsor_star.recipe index 5c20cecb8f..b1837120e5 100644 --- a/recipes/windsor_star.recipe +++ b/recipes/windsor_star.recipe @@ -189,9 +189,9 @@ class CanWestPaper(BasicNewsRecipe): # Find each instance of class="sectiontitle", class="featurecontent" for divtag in soup.findAll('div', attrs={'class': ["section_title02", "featurecontent"]}): - # self.log(" div class = %s" % divtag['class']) + # self.log(" div class = %s" % divtag['class']) if divtag['class'].startswith('section_title'): - # div contains section title + # div contains section title if not divtag.h3: continue key = self.tag_to_string(divtag.h3, False) @@ -221,11 +221,11 @@ class CanWestPaper(BasicNewsRecipe): if autag: author = self.tag_to_string(autag, False) # self.log("author %s" % author) - if not articles.has_key(key): # noqa + if key not in articles: articles[key] = [] articles[key].append(dict(title=title, url=url, date=pubdate, description=description, author=author, content='')) ans = [(keyl, articles[keyl]) - for keyl in ans if articles.has_key(keyl)] # noqa + for keyl in ans if keyl in articles] return ans diff --git a/recipes/zaobao.recipe b/recipes/zaobao.recipe index 48e24a53c0..4d674c4aaf 100644 --- a/recipes/zaobao.recipe +++ b/recipes/zaobao.recipe @@ -83,11 +83,10 @@ class ZAOBAO(BasicNewsRecipe): ] def preprocess_html(self, soup): - for tag in soup.findAll(name='a'): - if tag.has_key('href'): # noqa - tag_url = tag['href'] - if tag_url.find('http://') != -1 and tag_url.find('zaobao.com') == -1: - del tag['href'] + for tag in soup.findAll(name='a', href=True): + tag_url = tag['href'] + if tag_url.find('http://') != -1 and tag_url.find('zaobao.com') == -1: + del tag['href'] return soup def postprocess_html(self, soup, first): @@ -107,8 +106,8 @@ class ZAOBAO(BasicNewsRecipe): for i, item in enumerate(soup.findAll('li')): if i >= self.MAX_ITEMS_IN_INDEX: break - a = item.find('a') - if a and a.has_key('href'): # noqa + a = item.find('a', href=True) + if a is not None: a_url = a['href'] a_title = self.tag_to_string(a) date = ''