Get rid of has_key from all recipes

This commit is contained in:
Kovid Goyal 2019-03-22 23:47:21 +05:30
parent 3c86c9be6f
commit 5755625d1e
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
80 changed files with 196 additions and 221 deletions

View File

@ -217,7 +217,7 @@ A reasonably complex real life example that exposes more of the :term:`API` of `
description = self.tag_to_string(summary, use_alt=False) description = self.tag_to_string(summary, use_alt=False)
feed = key if key is not None else 'Uncategorized' feed = key if key is not None else 'Uncategorized'
if not articles.has_key(feed): if feed not in articles:
articles[feed] = [] articles[feed] = []
if not 'podcasts' in url: if not 'podcasts' in url:
articles[feed].append( articles[feed].append(
@ -225,7 +225,7 @@ A reasonably complex real life example that exposes more of the :term:`API` of `
description=description, description=description,
content='')) content=''))
ans = self.sort_index_by(ans, {'The Front Page':-1, 'Dining In, Dining Out':1, 'Obituaries':2}) ans = self.sort_index_by(ans, {'The Front Page':-1, 'Dining In, Dining Out':1, 'Obituaries':2})
ans = [(key, articles[key]) for key in ans if articles.has_key(key)] ans = [(key, articles[key]) for key in ans if key in articles]
return ans return ans
def preprocess_html(self, soup): def preprocess_html(self, soup):

View File

@ -60,7 +60,6 @@ class t20Minutos(BasicNewsRecipe):
else: else:
str = self.tag_to_string(item) str = self.tag_to_string(item)
item.replaceWith(str) item.replaceWith(str)
for item in soup.findAll('img'): for item in soup.findAll('img', alt=False):
if not item.has_key('alt'): # noqa item['alt'] = 'image'
item['alt'] = 'image'
return soup return soup

View File

@ -112,7 +112,7 @@ class AdvancedUserRecipe1282101454(BasicNewsRecipe):
for div in soup.findAll('div', attrs={'class': re.compile(self.author_reg_exp, re.IGNORECASE)}): for div in soup.findAll('div', attrs={'class': re.compile(self.author_reg_exp, re.IGNORECASE)}):
div.extract() div.extract()
for auth in div.findAll('a'): for auth in div.findAll('a'):
if (auth.has_key('class') and auth['class'] == 'cm-source-image'): # noqa if auth.get('class') == 'cm-source-image':
continue continue
names = names + comma + auth.contents[0] names = names + comma + auth.contents[0]
comma = ', ' comma = ', '

View File

@ -25,7 +25,7 @@ class AdvancedUserRecipe1290663986(BasicNewsRecipe):
def generic_parse(self, soup): def generic_parse(self, soup):
articles = [] articles = []
# soup.findAll('li', 'hentry'): # soup.findAll('li', 'hentry'):
for entry in soup.findAll(lambda tag: tag.name == 'li' and tag.has_key('class') and tag['class'].find('hentry') != -1): # noqa for entry in soup.findAll('li', attrs={'class': lambda x: x and 'hentry' in x}):
article_url = entry.a['href'] + '?print=yes' article_url = entry.a['href'] + '?print=yes'
article_title = entry.find('h3', 'entry-title') article_title = entry.find('h3', 'entry-title')
article_title = self.tag_to_string(article_title) article_title = self.tag_to_string(article_title)
@ -48,7 +48,7 @@ class AdvancedUserRecipe1290663986(BasicNewsRecipe):
def plumaje_parse(self, soup): def plumaje_parse(self, soup):
articles = [] articles = []
blogs_soup = soup.find(lambda tag: tag.name == 'ul' and tag.has_key('class') and tag['class'].find('bloglist-fecha') != -1) # noqa blogs_soup = soup.find('ul', attrs={'class': lambda x: x and 'bloglist-fecha' in x})
for entry in blogs_soup.findAll('li'): for entry in blogs_soup.findAll('li'):
article_title = entry.p article_title = entry.p
article_url = article_title.a['href'] + '?print=yes' article_url = article_title.a['href'] + '?print=yes'
@ -69,7 +69,7 @@ class AdvancedUserRecipe1290663986(BasicNewsRecipe):
def boca_parse(self, soup): def boca_parse(self, soup):
articles = [] articles = []
# soup.findAll('li', 'hentry'): # soup.findAll('li', 'hentry'):
for entry in soup.findAll(lambda tag: tag.name == 'div' and tag.has_key('class') and tag['class'].find('hentry') != -1): # noqa for entry in soup.findAll('div', attrs={'class': lambda x: x and 'hentry' in x}):
article_title = entry.find('h2', 'entry-title') article_title = entry.find('h2', 'entry-title')
article_url = article_title.a['href'] + '?print=yes' article_url = article_title.a['href'] + '?print=yes'
article_title = self.tag_to_string(article_title) article_title = self.tag_to_string(article_title)

View File

@ -62,8 +62,8 @@ class BuenosAiresHerald(BasicNewsRecipe):
soup = self.index_to_soup(feedurl) soup = self.index_to_soup(feedurl)
for item in soup.findAll('div', attrs={'class': 'nota_texto_seccion'}): for item in soup.findAll('div', attrs={'class': 'nota_texto_seccion'}):
description = self.tag_to_string(item.h2) description = self.tag_to_string(item.h2)
atag = item.h2.find('a') atag = item.h2.find('a', href=True)
if atag and atag.has_key('href'): # noqa if atag is not None:
url = self.INDEX + atag['href'] url = self.INDEX + atag['href']
title = description title = description
date = strftime(self.timefmt) date = strftime(self.timefmt)

View File

@ -50,8 +50,8 @@ class BenchmarkPl(BasicNewsRecipe):
def preprocess_html(self, soup): def preprocess_html(self, soup):
self.append_page(soup, soup.body) self.append_page(soup, soup.body)
for a in soup('a'): for a in soup.findAll('a', href=True):
if a.has_key('href') and not a['href'].startswith('http'): # noqa if not a['href'].startswith('http'):
a['href'] = self.INDEX + a['href'] a['href'] = self.INDEX + a['href']
for r in soup.findAll(attrs={'class': ['comments', 'body']}): for r in soup.findAll(attrs={'class': ['comments', 'body']}):
r.extract() r.extract()

View File

@ -55,7 +55,6 @@ class BigHollywood(BasicNewsRecipe):
else: else:
str = self.tag_to_string(item) str = self.tag_to_string(item)
item.replaceWith(str) item.replaceWith(str)
for item in soup.findAll('img'): for item in soup.findAll('img', alt=False):
if not item.has_key('alt'): # noqa item['alt'] = 'image'
item['alt'] = 'image'
return soup return soup

View File

@ -59,7 +59,6 @@ class Business_insider(BasicNewsRecipe):
if item.string is not None: if item.string is not None:
tstr = item.string tstr = item.string
item.replaceWith(tstr) item.replaceWith(tstr)
for item in soup.findAll('img'): for item in soup.findAll('img', alt=False):
if not item.has_key('alt'): # noqa item['alt'] = 'image'
item['alt'] = 'image'
return soup return soup

View File

@ -286,7 +286,7 @@ class CanWestPaper(BasicNewsRecipe):
else: else:
description = self.tag_to_string(dtag, False) description = self.tag_to_string(dtag, False)
print("DESCRIPTION: " + description) print("DESCRIPTION: " + description)
if not articles.has_key(key): # noqa if key not in articles:
articles[key] = [] articles[key] = []
articles[key].append(dict( articles[key].append(dict(
title=title, url=url, date='', description=description, author='', content='')) title=title, url=url, date='', description=description, author='', content=''))
@ -310,5 +310,5 @@ class CanWestPaper(BasicNewsRecipe):
for (k, url) in self.postmedia_index_pages: for (k, url) in self.postmedia_index_pages:
parse_web_index(k, url) parse_web_index(k, url)
ans = [(key, articles[key]) for key in ans if articles.has_key(key)] # noqa ans = [(key, articles[key]) for key in ans if key in articles]
return ans return ans

View File

@ -22,7 +22,7 @@ class CD_Action(BasicNewsRecipe):
return getattr(self, 'cover_url', self.cover_url) return getattr(self, 'cover_url', self.cover_url)
def preprocess_html(self, soup): def preprocess_html(self, soup):
for a in soup('a'): for a in soup.findAll('a', href=True):
if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']: # noqa if 'http://' not in a['href'] and 'https://' not in a['href']:
a['href'] = self.index + a['href'] a['href'] = self.index + a['href']
return soup return soup

View File

@ -101,10 +101,7 @@ class CSMonitor(BasicNewsRecipe):
else: else:
str = self.tag_to_string(item) str = self.tag_to_string(item)
item.replaceWith(str) item.replaceWith(str)
for item in soup.findAll('img'): for item in soup.findAll('img', src=True):
if 'scorecardresearch' in item['src']: if 'scorecardresearch' in item['src']:
item.extract() item.extract()
else:
if not item.has_key('alt'): # noqa
item['alt'] = 'image'
return soup return soup

View File

@ -36,10 +36,9 @@ class Cinebel(BasicNewsRecipe):
] ]
def preprocess_html(self, soup): def preprocess_html(self, soup):
for alink in soup.findAll('a'): for alink in soup.findAll('a', href=True):
if alink.has_key('href'): # noqa tstr = "Site officiel: " + alink['href']
tstr = "Site officiel: " + alink['href'] alink.replaceWith(tstr)
alink.replaceWith(tstr)
return soup return soup
def get_cover_url(self): def get_cover_url(self):

View File

@ -131,12 +131,12 @@ class CIO_Magazine(BasicNewsRecipe):
# Esto esta copiado del NY times # Esto esta copiado del NY times
feed = key if key is not None else 'Uncategorized' feed = key if key is not None else 'Uncategorized'
if not articles.has_key(feed): # noqa if feed not in articles:
articles[feed] = [] articles[feed] = []
if 'podcasts' not in url: if 'podcasts' not in url:
articles[feed].append( articles[feed].append(
dict(title=title, url=url, date=pubdate, dict(title=title, url=url, date=pubdate,
description=description, description=description,
content='')) content=''))
feeds = [(k, articles[k]) for k in feeds if articles.has_key(k)] # noqa feeds = [(k, articles[k]) for k in feeds if k in articles]
return feeds return feeds

View File

@ -54,7 +54,7 @@ class TheCND(BasicNewsRecipe):
if re.search('cm', date): if re.search('cm', date):
continue continue
if (date is not None) and len(date) > 2: if (date is not None) and len(date) > 2:
if not articles.has_key(date): # noqa if date not in articles:
articles[date] = [] articles[date] = []
articles[date].append( articles[date].append(
{'title': title, 'url': url, 'description': '', 'date': ''}) {'title': title, 'url': url, 'description': '', 'date': ''})

View File

@ -54,7 +54,7 @@ class TheCND(BasicNewsRecipe):
continue continue
self.log('\tFound article: ', title, 'at', url, '@', date) self.log('\tFound article: ', title, 'at', url, '@', date)
if (date is not None) and len(date) > 2: if (date is not None) and len(date) > 2:
if not articles.has_key(date): # noqa if date not in articles:
articles[date] = [] articles[date] = []
articles[date].append( articles[date].append(
{'title': title, 'url': url, 'description': '', 'date': ''}) {'title': title, 'url': url, 'description': '', 'date': ''})

View File

@ -53,8 +53,8 @@ class General(BasicNewsRecipe):
for item in soup.body.findAll(name=['table', 'td', 'tr', 'th', 'caption', 'thead', 'tfoot', 'tbody', 'colgroup', 'col']): for item in soup.body.findAll(name=['table', 'td', 'tr', 'th', 'caption', 'thead', 'tfoot', 'tbody', 'colgroup', 'col']):
item.name = 'div' item.name = 'div'
for attrib in attribs: for attrib in attribs:
if item.has_key(attrib): # noqa item[attrib] = ''
del item[attrib] del item[attrib]
return soup return soup
def get_cover_url(self): def get_cover_url(self):

View File

@ -46,7 +46,6 @@ class CubaDebate(BasicNewsRecipe):
def preprocess_html(self, soup): def preprocess_html(self, soup):
for item in soup.findAll(style=True): for item in soup.findAll(style=True):
del item['style'] del item['style']
for item in soup.findAll('img'): for item in soup.findAll('img', alt=False):
if not item.has_key('alt'): # noqa item['alt'] = 'image'
item['alt'] = 'image'
return soup return soup

View File

@ -57,7 +57,6 @@ class Pagina12(BasicNewsRecipe):
else: else:
str = self.tag_to_string(item) str = self.tag_to_string(item)
item.replaceWith(str) item.replaceWith(str)
for item in soup.findAll('img'): for item in soup.findAll('img', alt=False):
if not item.has_key('alt'): # noqa item['alt'] = 'image'
item['alt'] = 'image'
return soup return soup

View File

@ -64,7 +64,7 @@ class DeGentenaarOnline(BasicNewsRecipe):
del item['style'] del item['style']
for item in soup.findAll('span'): for item in soup.findAll('span'):
item.name = 'div' item.name = 'div'
if item.has_key('id') and item['id'] == 'lblArticleTitle': # noqa if item.get('id') == 'lblArticleTitle':
item.name = 'h3' item.name = 'h3'
soup.html['lang'] = self.lang soup.html['lang'] = self.lang

View File

@ -65,8 +65,8 @@ class DeutscheWelle_bs(BasicNewsRecipe):
if limg: if limg:
item.name = 'div' item.name = 'div'
del item['href'] del item['href']
if item.has_key('target'): # noqa item['target'] = ''
del item['target'] del item['target']
else: else:
str = self.tag_to_string(item) str = self.tag_to_string(item)
item.replaceWith(str) item.replaceWith(str)

View File

@ -63,8 +63,8 @@ class DeutscheWelle_hr(BasicNewsRecipe):
if limg: if limg:
item.name = 'div' item.name = 'div'
del item['href'] del item['href']
if item.has_key('target'): # noqa item['target'] = ''
del item['target'] del item['target']
else: else:
str = self.tag_to_string(item) str = self.tag_to_string(item)
item.replaceWith(str) item.replaceWith(str)

View File

@ -54,8 +54,8 @@ class DeutscheWelle_pt(BasicNewsRecipe):
if limg: if limg:
item.name = 'div' item.name = 'div'
del item['href'] del item['href']
if item.has_key('target'): # noqa item['target'] = ''
del item['target'] del item['target']
else: else:
str = self.tag_to_string(item) str = self.tag_to_string(item)
item.replaceWith(str) item.replaceWith(str)

View File

@ -68,8 +68,8 @@ class DeutscheWelle_sr(BasicNewsRecipe):
if limg: if limg:
item.name = 'div' item.name = 'div'
del item['href'] del item['href']
if item.has_key('target'): # noqa item['target'] = ''
del item['target'] del item['target']
else: else:
str = self.tag_to_string(item) str = self.tag_to_string(item)
item.replaceWith(str) item.replaceWith(str)

View File

@ -55,8 +55,8 @@ class DnevnikCro(BasicNewsRecipe):
for item in soup.body.findAll(name=['table', 'td', 'tr', 'th', 'caption', 'thead', 'tfoot', 'tbody', 'colgroup', 'col']): for item in soup.body.findAll(name=['table', 'td', 'tr', 'th', 'caption', 'thead', 'tfoot', 'tbody', 'colgroup', 'col']):
item.name = 'div' item.name = 'div'
for attrib in attribs: for attrib in attribs:
if item.has_key(attrib): # noqa item[attrib] = ''
del item[attrib] del item[attrib]
mlang = Tag(soup, 'meta', [ mlang = Tag(soup, 'meta', [
("http-equiv", "Content-Language"), ("content", self.lang)]) ("http-equiv", "Content-Language"), ("content", self.lang)])

View File

@ -37,7 +37,6 @@ class DobaNevinosti(BasicNewsRecipe):
def preprocess_html(self, soup): def preprocess_html(self, soup):
for item in soup.findAll(style=True): for item in soup.findAll(style=True):
del item['style'] del item['style']
for item in soup.findAll('img'): for item in soup.findAll('img', alt=False):
if not item.has_key('alt'): # noqa item['alt'] = 'image'
item['alt'] = 'image'
return soup return soup

View File

@ -28,8 +28,8 @@ class Dobreprogramy_pl(BasicNewsRecipe):
('Blogi', 'http://feeds.feedburner.com/dobreprogramy/BlogCzytelnikow')] ('Blogi', 'http://feeds.feedburner.com/dobreprogramy/BlogCzytelnikow')]
def preprocess_html(self, soup): def preprocess_html(self, soup):
for a in soup('a'): for a in soup('a', href=True):
if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']: # noqa if 'http://' not in a['href'] and 'https://' not in a['href']:
a['href'] = self.index + a['href'] a['href'] = self.index + a['href']
for r in soup.findAll('iframe'): for r in soup.findAll('iframe'):
r.parent.extract() r.parent.extract()

View File

@ -81,8 +81,8 @@ class Dzieje(BasicNewsRecipe):
return feeds return feeds
def preprocess_html(self, soup): def preprocess_html(self, soup):
for a in soup('a'): for a in soup('a', href=True):
if a.has_key('href') and not a['href'].startswith('http'): # noqa if not a['href'].startswith('http'):
a['href'] = self.index + a['href'] a['href'] = self.index + a['href']
self.append_page(soup, soup.body) self.append_page(soup, soup.body)
return soup return soup

View File

@ -286,7 +286,7 @@ class CanWestPaper(BasicNewsRecipe):
else: else:
description = self.tag_to_string(dtag, False) description = self.tag_to_string(dtag, False)
print("DESCRIPTION: " + description) print("DESCRIPTION: " + description)
if not articles.has_key(key): # noqa if key not in articles:
articles[key] = [] articles[key] = []
articles[key].append(dict( articles[key].append(dict(
title=title, url=url, date='', description=description, author='', content='')) title=title, url=url, date='', description=description, author='', content=''))
@ -310,5 +310,5 @@ class CanWestPaper(BasicNewsRecipe):
for (k, url) in self.postmedia_index_pages: for (k, url) in self.postmedia_index_pages:
parse_web_index(k, url) parse_web_index(k, url)
ans = [(key, articles[key]) for key in ans if articles.has_key(key)] # noqa ans = [(key, articles[key]) for key in ans if key in articles]
return ans return ans

View File

@ -113,11 +113,11 @@ class ElDiplo_Recipe(BasicNewsRecipe):
if aut: if aut:
auth = self.tag_to_string(aut, use_alt=False).strip() auth = self.tag_to_string(aut, use_alt=False).strip()
if not articles.has_key(section): # noqa if section not in articles: # noqa
articles[section] = [] articles[section] = []
articles[section].append(dict( articles[section].append(dict(
title=title, author=auth, url=url, date=None, description=description, content='')) title=title, author=auth, url=url, date=None, description=description, content=''))
ans = [(s, articles[s]) for s in ans if articles.has_key(s)] # noqa ans = [(s, articles[s]) for s in ans if s in articles]
return ans return ans

View File

@ -53,7 +53,6 @@ class ElClubDelEbook(BasicNewsRecipe):
else: else:
str = self.tag_to_string(item) str = self.tag_to_string(item)
item.replaceWith(str) item.replaceWith(str)
for item in soup.findAll('img'): for item in soup.findAll('img', alt=False):
if not item.has_key('alt'): # noqa item['alt'] = 'image'
item['alt'] = 'image'
return soup return soup

View File

@ -35,17 +35,16 @@ class ElPaisSemanal(BasicNewsRecipe):
def parse_index(self): def parse_index(self):
articles = [] articles = []
soup = self.index_to_soup(self.index) soup = self.index_to_soup(self.index)
for item in soup.findAll('a', attrs={'class': ['g19i003', 'g17r003', 'g17i003']}): for item in soup.findAll('a', attrs={'class': ['g19i003', 'g17r003', 'g17i003']}, href=True):
description = '' description = ''
title_prefix = '' title_prefix = ''
feed_link = item feed_link = item
if item.has_key('href'): # noqa url = 'http://www.elpais.com' + item['href'].rpartition('/')[0]
url = 'http://www.elpais.com' + item['href'].rpartition('/')[0] title = title_prefix + self.tag_to_string(feed_link)
title = title_prefix + self.tag_to_string(feed_link) date = strftime(self.timefmt)
date = strftime(self.timefmt) articles.append({
articles.append({ 'title': title, 'date': date, 'url': url, 'description': description
'title': title, 'date': date, 'url': url, 'description': description })
})
return [(soup.head.title.string, articles)] return [(soup.head.title.string, articles)]
def print_version(self, url): def print_version(self, url):

View File

@ -31,7 +31,7 @@ class ElUniversalImpresaRecipe(BasicNewsRecipe):
table = soup.find('table', attrs={'width': '500'}) table = soup.find('table', attrs={'width': '500'})
articles = [] articles = []
for td in table.findAll(lambda tag: tag.name == 'td' and tag.has_key('class') and tag['class'] == 'arnegro12'): # noqa for td in table.findAll('td', attrs={'class': 'arnegro12'}):
a = td.a a = td.a
a.extract() a.extract()
title = self.tag_to_string(a) title = self.tag_to_string(a)
@ -79,8 +79,8 @@ class ElUniversalImpresaRecipe(BasicNewsRecipe):
tag = soup.find('font', attrs={'color': '#0F046A'}) tag = soup.find('font', attrs={'color': '#0F046A'})
if tag: if tag:
for attr in ['color', 'face', 'helvetica,', 'sans-serif', 'size']: for attr in ['color', 'face', 'helvetica,', 'sans-serif', 'size']:
if tag.has_key(attr): # noqa tag[attr] = ''
del tag[attr] del tag[attr]
tag.name = 'h1' tag.name = 'h1'
return soup return soup

View File

@ -62,8 +62,8 @@ class ESPN(BasicNewsRecipe):
] ]
def preprocess_html(self, soup): def preprocess_html(self, soup):
for div in soup.findAll('div'): for div in soup.findAll('div', style=True):
if div.has_key('style') and 'px' in div['style']: # noqa if 'px' in div['style']:
div['style'] = '' div['style'] = ''
return soup return soup

View File

@ -88,7 +88,7 @@ class Estadao(BasicNewsRecipe):
def postprocess_html(self, soup, first): def postprocess_html(self, soup, first):
# process all the images. assumes that the new html has the correct # process all the images. assumes that the new html has the correct
# path # path
for tag in soup.findAll(lambda tag: tag.name.lower() == 'img' and tag.has_key('src')): # noqa for tag in soup.findAll('img', src=True):
iurl = tag['src'] iurl = tag['src']
img = Image() img = Image()
img.open(iurl) img.open(iurl)

View File

@ -57,7 +57,6 @@ class FinancialSense(BasicNewsRecipe):
else: else:
str = self.tag_to_string(item) str = self.tag_to_string(item)
item.replaceWith(str) item.replaceWith(str)
for item in soup.findAll('img'): for item in soup.findAll('img', alt=False):
if not item.has_key('alt'): # noqa item['alt'] = 'image'
item['alt'] = 'image'
return soup return soup

View File

@ -84,8 +84,8 @@ class General(BasicNewsRecipe):
for item in soup.body.findAll(name=['table', 'td', 'tr', 'th', 'caption', 'thead', 'tfoot', 'tbody', 'colgroup', 'col']): for item in soup.body.findAll(name=['table', 'td', 'tr', 'th', 'caption', 'thead', 'tfoot', 'tbody', 'colgroup', 'col']):
item.name = 'div' item.name = 'div'
for attrib in attribs: for attrib in attribs:
if item.has_key(attrib): # noqa item[attrib] = ''
del item[attrib] del item[attrib]
return soup return soup
def get_cover_url(self): def get_cover_url(self):

View File

@ -27,7 +27,7 @@ class Gameplay_pl(BasicNewsRecipe):
return url return url
def preprocess_html(self, soup): def preprocess_html(self, soup):
for a in soup('a'): for a in soup('a', href=True):
if a.has_key('href') and '../' in a['href']: # noqa if '../' in a['href']:
a['href'] = self.index + a['href'][2:] a['href'] = self.index + a['href'][2:]
return soup return soup

View File

@ -56,7 +56,7 @@ class AdvancedUserRecipe1307556816(BasicNewsRecipe):
extra_css = 'body, h3, p, div, span{margin:0px; padding:0px} h3.entry-header{font-size: 0.8em} div.entry-body{font-size: 0.7em}' extra_css = 'body, h3, p, div, span{margin:0px; padding:0px} h3.entry-header{font-size: 0.8em} div.entry-body{font-size: 0.7em}'
def postprocess_html(self, soup, first): def postprocess_html(self, soup, first):
for tag in soup.findAll(lambda tag: tag.name.lower() == 'img' and tag.has_key('src')): # noqa for tag in soup.findAll('img', src=True):
iurl = tag['src'] iurl = tag['src']
img = Image() img = Image()
img.open(iurl) img.open(iurl)

View File

@ -58,8 +58,8 @@ class Gildia(BasicNewsRecipe):
def preprocess_html(self, soup): def preprocess_html(self, soup):
title = soup.title.renderContents().lower() title = soup.title.renderContents().lower()
for a in soup('a'): for a in soup('a', href=True):
if a.has_key('href') and not a['href'].startswith('http'): # noqa if not a['href'].startswith('http'):
if '/gry/' in a['href']: if '/gry/' in a['href']:
a['href'] = 'http://www.gry.gildia.pl' + a['href'] a['href'] = 'http://www.gry.gildia.pl' + a['href']
elif u'książk' in title or u'komiks' in title: elif u'książk' in title or u'komiks' in title:

View File

@ -43,8 +43,8 @@ class Gram_pl(BasicNewsRecipe):
tag.p.img.extract() tag.p.img.extract()
tag.p.insert(len(tag.p.contents) - 2, tag.p.insert(len(tag.p.contents) - 2,
BeautifulSoup('<h2>Ocena: {0}</h2>'.format(rate)).h2) BeautifulSoup('<h2>Ocena: {0}</h2>'.format(rate)).h2)
for a in soup('a'): for a in soup.findAll('a', href=True):
if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']: # noqa if 'http://' not in a['href'] and 'https://' not in a['href']: # noqa
a['href'] = self.index + a['href'] a['href'] = self.index + a['href']
tag = soup.find(name='span', attrs={'class': 'platforma'}) tag = soup.find(name='span', attrs={'class': 'platforma'})
if tag: if tag:

View File

@ -47,7 +47,7 @@ class in4(BasicNewsRecipe):
def preprocess_html(self, soup): def preprocess_html(self, soup):
self.append_page(soup, soup.body) self.append_page(soup, soup.body)
for a in soup('a'): for a in soup.findAll('a', href=True):
if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']: # noqa if 'http://' not in a['href'] and 'https://' not in a['href']:
a['href'] = self.index + a['href'] a['href'] = self.index + a['href']
return soup return soup

View File

@ -21,7 +21,7 @@ class INFRA(BasicNewsRecipe):
def preprocess_html(self, soup): def preprocess_html(self, soup):
for item in soup.findAll(style=True): for item in soup.findAll(style=True):
del item['style'] del item['style']
for a in soup('a'): for a in soup.findAll('a', href=True):
if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']: # noqa if 'http://' not in a['href'] and 'https://' not in a['href']:
a['href'] = self.index + a['href'] a['href'] = self.index + a['href']
return soup return soup

View File

@ -65,8 +65,8 @@ class Jutarnji(BasicNewsRecipe):
for item in soup.body.findAll(name=['table', 'td', 'tr', 'th', 'caption', 'thead', 'tfoot', 'tbody', 'colgroup', 'col']): for item in soup.body.findAll(name=['table', 'td', 'tr', 'th', 'caption', 'thead', 'tfoot', 'tbody', 'colgroup', 'col']):
item.name = 'div' item.name = 'div'
for attrib in attribs: for attrib in attribs:
if item.has_key(attrib): # noqa item[attrib] = ''
del item[attrib] del item[attrib]
mlang = Tag(soup, 'meta', [ mlang = Tag(soup, 'meta', [
("http-equiv", "Content-Language"), ("content", self.lang)]) ("http-equiv", "Content-Language"), ("content", self.lang)])

View File

@ -47,7 +47,7 @@ class AdvancedUserRecipe1299694372(BasicNewsRecipe):
soup = self.index_to_soup(feedurl) soup = self.index_to_soup(feedurl)
for item in soup.findAll('table', attrs={'class': ['item', 'item new']}): for item in soup.findAll('table', attrs={'class': ['item', 'item new']}):
atag = item.a atag = item.a
if atag and atag.has_key('href'): # noqa if atag and atag.get('href') is not None:
url = atag['href'] url = atag['href']
articles.append({ articles.append({
'url': url 'url': url

View File

@ -61,17 +61,18 @@ class LeMondeDiplomatiqueEn(BasicNewsRecipe):
cnt = soup.find('div', attrs={'class': 'som_num'}) cnt = soup.find('div', attrs={'class': 'som_num'})
for item in cnt.findAll('li'): for item in cnt.findAll('li'):
description = '' description = ''
feed_link = item.find('a') feed_link = item.find('a', href=True)
if feed_link is None:
continue
desc = item.find('div', attrs={'class': 'chapo'}) desc = item.find('div', attrs={'class': 'chapo'})
if desc: if desc:
description = desc.string description = desc.string
if feed_link and feed_link.has_key('href'): # noqa url = self.PREFIX + feed_link['href'].partition('/../')[2]
url = self.PREFIX + feed_link['href'].partition('/../')[2] title = self.tag_to_string(feed_link)
title = self.tag_to_string(feed_link) date = strftime(self.timefmt)
date = strftime(self.timefmt) articles.append({
articles.append({ 'title': title, 'date': date, 'url': url, 'description': description
'title': title, 'date': date, 'url': url, 'description': description })
})
return [(self.title, articles)] return [(self.title, articles)]
def get_cover_url(self): def get_cover_url(self):

View File

@ -60,14 +60,9 @@ class LentaRURecipe(BasicNewsRecipe):
if not feedData: if not feedData:
raise NotImplementedError raise NotImplementedError
self.log("parse_index: Feed loaded successfully.") self.log("parse_index: Feed loaded successfully.")
if feedData.feed.has_key('title'): # noqa
self.title = feedData.feed.title
self.log("parse_index: Title updated to: ", self.title)
if feedData.feed.has_key('image'): # noqa
self.log("HAS IMAGE!!!!")
def get_virtual_feed_articles(feed): def get_virtual_feed_articles(feed):
if feeds.has_key(feed): # noqa if feed in feeds:
return feeds[feed][1] return feeds[feed][1]
self.log("Adding new feed: ", feed) self.log("Adding new feed: ", feed)
articles = [] articles = []
@ -84,7 +79,7 @@ class LentaRURecipe(BasicNewsRecipe):
continue continue
article = {'title': title, 'url': link, 'description': item.get( article = {'title': title, 'url': link, 'description': item.get(
'description', ''), 'date': item.get('date', ''), 'content': ''} 'description', ''), 'date': item.get('date', ''), 'content': ''}
if not item.has_key('tags'): # noqa if not item.get('tags'):
get_virtual_feed_articles('_default').append(article) get_virtual_feed_articles('_default').append(article)
continue continue
for tag in item.tags: for tag in item.tags:
@ -101,7 +96,7 @@ class LentaRURecipe(BasicNewsRecipe):
# Select sorted feeds first of all # Select sorted feeds first of all
result = [] result = []
for feedName in self.sortOrder: for feedName in self.sortOrder:
if (not feeds.has_key(feedName)): # noqa if (not feeds.get(feedName)):
continue continue
result.append(feeds[feedName]) result.append(feeds[feedName])
del feeds[feedName] del feeds[feedName]

View File

@ -45,7 +45,7 @@ class LondonReviewOfBooksPayed(BasicNewsRecipe):
soup = self.index_to_soup(self.INDEX) soup = self.index_to_soup(self.INDEX)
cover_item = soup.find('p', attrs={'class': 'cover'}) cover_item = soup.find('p', attrs={'class': 'cover'})
dates = str(soup.find('span', attrs={'class': 'coverdate'})) dates = str(soup.find('span', attrs={'class': 'coverdate'}))
newdates = re.sub('\<.*\>', '', re.split('<br />', dates)[1]) newdates = re.sub(r'\<.*\>', '', re.split('<br />', dates)[1])
self.timefmt = ' [%s]' % newdates self.timefmt = ' [%s]' % newdates
lrbtitle = self.title lrbtitle = self.title
if cover_item: if cover_item:
@ -58,13 +58,13 @@ class LondonReviewOfBooksPayed(BasicNewsRecipe):
description = u'' description = u''
title_prefix = u'' title_prefix = u''
feed_link = item feed_link = item
if feed_link.has_key('href'): # noqa if feed_link.get('href'):
url = self.INDEX + feed_link['href'] url = self.INDEX + feed_link['href']
title_link = re.split('<br />', str(feed_link)) title_link = re.split('<br />', str(feed_link))
if len(title_link) > 1: if len(title_link) > 1:
title = title_prefix + \ title = title_prefix + \
re.sub( re.sub(
'\<.*\>', '', title_link[0]) + ' - ' + re.sub('\<.*\>', '', title_link[1]) r'\<.*\>', '', title_link[0]) + ' - ' + re.sub(r'\<.*\>', '', title_link[1])
else: else:
title = title_prefix + self.tag_to_string(feed_link) title = title_prefix + self.tag_to_string(feed_link)
desc = item.findNext('li') desc = item.findNext('li')

View File

@ -133,7 +133,7 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
class MerryPreProcess(): class MerryPreProcess():
def optimizePicture(self, soup): def optimizePicture(self, soup):
for tag in soup.findAll(lambda tag: tag.name.lower() == 'img' and tag.has_key('src')): # noqa for tag in soup.findAll('img', src=True):
try: try:
iurl = tag['src'] iurl = tag['src']
img = Image() img = Image()

View File

@ -42,11 +42,10 @@ class MoneyControlRecipe(BasicNewsRecipe):
h1.append(self.tag_to_string(headline)) h1.append(self.tag_to_string(headline))
freshSoup.body.append(h1) freshSoup.body.append(h1)
for p in soup.findAll('p'): for p in soup.findAll('p', attrs={'class': true}):
if p.has_key('class'): if p['class'] == 'MsoNormal':
if p['class'] == 'MsoNormal': # We have some weird pagebreak marker here; it will not find all of them however
# We have some weird pagebreak marker here; it will not find all of them however continue
continue
para = Tag(freshSoup, 'p') para = Tag(freshSoup, 'p')
# Convert to string; this will loose all formatting but also all illegal markup # Convert to string; this will loose all formatting but also all illegal markup

View File

@ -286,7 +286,7 @@ class CanWestPaper(BasicNewsRecipe):
else: else:
description = self.tag_to_string(dtag, False) description = self.tag_to_string(dtag, False)
print("DESCRIPTION: " + description) print("DESCRIPTION: " + description)
if not articles.has_key(key): # noqa if key not in articles:
articles[key] = [] articles[key] = []
articles[key].append(dict( articles[key].append(dict(
title=title, url=url, date='', description=description, author='', content='')) title=title, url=url, date='', description=description, author='', content=''))
@ -310,5 +310,5 @@ class CanWestPaper(BasicNewsRecipe):
for (k, url) in self.postmedia_index_pages: for (k, url) in self.postmedia_index_pages:
parse_web_index(k, url) parse_web_index(k, url)
ans = [(key, articles[key]) for key in ans if articles.has_key(key)] # noqa ans = [(key, articles[key]) for key in ans if key in articles]
return ans return ans

View File

@ -38,7 +38,7 @@ class naszdziennik(BasicNewsRecipe):
section = self.tag_to_string(section) section = self.tag_to_string(section)
# sprawdzamy czy w słowniku artykułów istnieje klucz dotyczący sekcji # sprawdzamy czy w słowniku artykułów istnieje klucz dotyczący sekcji
# jeśli nie istnieje to : # jeśli nie istnieje to :
if not articles.has_key(section): # noqa if section not in articles:
# do listy sekcji dodajemy nową sekcje # do listy sekcji dodajemy nową sekcje
sections.append(section) sections.append(section)
# deklarujemy nową sekcje w słowniku artykułów przypisując jej # deklarujemy nową sekcje w słowniku artykułów przypisując jej

View File

@ -77,7 +77,7 @@ class NrcNextRecipe(BasicNewsRecipe):
# Add the article to a temporary list # Add the article to a temporary list
article = {'title': completeTitle, 'date': u'', article = {'title': completeTitle, 'date': u'',
'url': href, 'description': '<p>&nbsp;</p>'} 'url': href, 'description': '<p>&nbsp;</p>'}
if not articles.has_key(index): # noqa if index not in articles:
articles[index] = [] articles[index] = []
articles[index].append(article) articles[index].append(article)
@ -90,7 +90,7 @@ class NrcNextRecipe(BasicNewsRecipe):
indices, {u'columnisten': 1, u'koken': 3, u'geld & werk': 2, u'vandaag': 0}) indices, {u'columnisten': 1, u'koken': 3, u'geld & werk': 2, u'vandaag': 0})
# Apply this sort order to the actual list of feeds and articles # Apply this sort order to the actual list of feeds and articles
answer = [(key, articles[key]) answer = [(key, articles[key])
for key in indices if articles.has_key(key)] # noqa for key in indices if key in articles]
return answer return answer

View File

@ -201,8 +201,7 @@ class Newsweek(BasicNewsRecipe):
self.DATE = matches.group(0) self.DATE = matches.group(0)
# cover # cover
img = main_section.find(lambda tag: tag.name == 'img' and tag.has_key( # noqa img = main_section.find('img', src=True, alt=True, title=True)
'alt') and tag.has_key('title'))
self.cover_url = img['src'] self.cover_url = img['src']
feeds = [] feeds = []
articles = {} articles = {}
@ -233,7 +232,7 @@ class Newsweek(BasicNewsRecipe):
if article is None: if article is None:
continue continue
if articles.has_key(section): # noqa if section in articles:
articles[section].append(article) articles[section].append(article)
else: else:
articles[section] = [article] articles[section] = [article]

View File

@ -82,7 +82,6 @@ class Novosti(BasicNewsRecipe):
else: else:
str = self.tag_to_string(item) str = self.tag_to_string(item)
item.replaceWith(str) item.replaceWith(str)
for item in soup.findAll('img'): for item in soup.findAll('img', alt=False):
if not item.has_key('alt'): # noqa item['alt'] = 'image'
item['alt'] = 'image'
return soup return soup

View File

@ -55,8 +55,8 @@ class NotSafeForWork(BasicNewsRecipe):
def get_feeds(self): def get_feeds(self):
self.feeds = [] self.feeds = []
soup = self.index_to_soup(self.SETTINGS) soup = self.index_to_soup(self.SETTINGS)
for item in soup.findAll('input', attrs={'type': 'text'}): for item in soup.findAll('input', value=True, attrs={'type': 'text'}):
if item.has_key('value') and item['value'].startswith('https://www.nsfwcorp.com/feed/'): # noqa if item['value'].startswith('https://www.nsfwcorp.com/feed/'):
self.feeds.append(item['value']) self.feeds.append(item['value'])
return self.feeds return self.feeds
return self.feeds return self.feeds

View File

@ -109,7 +109,6 @@ class Nspm(BasicNewsRecipe):
else: else:
str = self.tag_to_string(item) str = self.tag_to_string(item)
item.replaceWith(str) item.replaceWith(str)
for item in soup.findAll('img'): for item in soup.findAll('img', alt=False):
if not item.has_key('alt'): # noqa item['alt'] = 'image'
item['alt'] = 'image'
return soup return soup

View File

@ -286,7 +286,7 @@ class CanWestPaper(BasicNewsRecipe):
else: else:
description = self.tag_to_string(dtag, False) description = self.tag_to_string(dtag, False)
print("DESCRIPTION: " + description) print("DESCRIPTION: " + description)
if not articles.has_key(key): # noqa if key not in articles:
articles[key] = [] articles[key] = []
articles[key].append(dict( articles[key].append(dict(
title=title, url=url, date='', description=description, author='', content='')) title=title, url=url, date='', description=description, author='', content=''))
@ -310,5 +310,5 @@ class CanWestPaper(BasicNewsRecipe):
for (k, url) in self.postmedia_index_pages: for (k, url) in self.postmedia_index_pages:
parse_web_index(k, url) parse_web_index(k, url)
ans = [(key, articles[key]) for key in ans if articles.has_key(key)] # noqa ans = [(key, articles[key]) for key in ans if key in articles]
return ans return ans

View File

@ -86,8 +86,8 @@ class Pobjeda(BasicNewsRecipe):
soup = self.index_to_soup(feedurl) soup = self.index_to_soup(feedurl)
for item in soup.findAll('div', attrs={'class': 'vijest'}): for item in soup.findAll('div', attrs={'class': 'vijest'}):
description = self.tag_to_string(item.h2) description = self.tag_to_string(item.h2)
atag = item.h1.find('a') atag = item.h1.find('a', href=True)
if atag and atag.has_key('href'): # noqa if atag is not None:
url = self.INDEX + '/' + atag['href'] url = self.INDEX + '/' + atag['href']
title = self.tag_to_string(atag) title = self.tag_to_string(atag)
date = strftime(self.timefmt) date = strftime(self.timefmt)

View File

@ -58,10 +58,9 @@ class Politika(BasicNewsRecipe):
del item['style'] del item['style']
for item in soup.findAll('a', attrs={'class': 'category'}): for item in soup.findAll('a', attrs={'class': 'category'}):
item.name = 'span' item.name = 'span'
if item.has_key('href'): # noqa item['href'] = item['title'] = ''
del item['href'] del item['href']
if item.has_key('title'): # noqa del item['title']
del item['title']
return soup return soup
def get_cover_url(self): def get_cover_url(self):

View File

@ -51,7 +51,7 @@ class Polityka(BasicNewsRecipe):
'http://archiwum.polityka.pl' + div.a['href'],) 'http://archiwum.polityka.pl' + div.a['href'],)
section = self.tag_to_string(article_page.find( section = self.tag_to_string(article_page.find(
'h2', attrs={'class': 'box_nag'})).split('/')[0].lstrip().rstrip() 'h2', attrs={'class': 'box_nag'})).split('/')[0].lstrip().rstrip()
if not articles.has_key(section): # noqa if section not in articles:
articles[section] = [] articles[section] = []
articles[section].append({ articles[section].append({
'title': self.tag_to_string(div.a), 'title': self.tag_to_string(div.a),

View File

@ -190,7 +190,7 @@ class CanWestPaper(BasicNewsRecipe):
# Find each instance of class="sectiontitle", class="featurecontent" # Find each instance of class="sectiontitle", class="featurecontent"
for divtag in soup.findAll('div', attrs={'class': ["section_title02", "featurecontent"]}): for divtag in soup.findAll('div', attrs={'class': ["section_title02", "featurecontent"]}):
if divtag['class'].startswith('section_title'): if divtag['class'].startswith('section_title'):
# div contains section title # div contains section title
if not divtag.h3: if not divtag.h3:
continue continue
key = self.tag_to_string(divtag.h3, False) key = self.tag_to_string(divtag.h3, False)
@ -215,11 +215,11 @@ class CanWestPaper(BasicNewsRecipe):
autag = divtag.find('h4') autag = divtag.find('h4')
if autag: if autag:
author = self.tag_to_string(autag, False) author = self.tag_to_string(autag, False)
if not articles.has_key(key): # noqa if key not in articles:
articles[key] = [] articles[key] = []
articles[key].append(dict(title=title, url=url, date=pubdate, articles[key].append(dict(title=title, url=url, date=pubdate,
description=description, author=author, content='')) description=description, author=author, content=''))
ans = [(keyl, articles[keyl]) ans = [(keyl, articles[keyl])
for keyl in ans if articles.has_key(keyl)] # noqa for keyl in ans if keyl in articles]
return ans return ans

View File

@ -46,8 +46,8 @@ class Republika(BasicNewsRecipe):
for item in soup.body.findAll(name=['table', 'td', 'tr', 'th', 'caption', 'thead', 'tfoot', 'tbody', 'colgroup', 'col']): for item in soup.body.findAll(name=['table', 'td', 'tr', 'th', 'caption', 'thead', 'tfoot', 'tbody', 'colgroup', 'col']):
item.name = 'div' item.name = 'div'
for attrib in attribs: for attrib in attribs:
if item.has_key(attrib): # noqa item[attrib] = ''
del item[attrib] del item[attrib]
return soup return soup
def parse_index(self): def parse_index(self):

View File

@ -190,7 +190,7 @@ class CanWestPaper(BasicNewsRecipe):
# Find each instance of class="sectiontitle", class="featurecontent" # Find each instance of class="sectiontitle", class="featurecontent"
for divtag in soup.findAll('div', attrs={'class': ["section_title02", "featurecontent"]}): for divtag in soup.findAll('div', attrs={'class': ["section_title02", "featurecontent"]}):
if divtag['class'].startswith('section_title'): if divtag['class'].startswith('section_title'):
# div contains section title # div contains section title
if not divtag.h3: if not divtag.h3:
continue continue
key = self.tag_to_string(divtag.h3, False) key = self.tag_to_string(divtag.h3, False)
@ -215,10 +215,10 @@ class CanWestPaper(BasicNewsRecipe):
autag = divtag.find('h4') autag = divtag.find('h4')
if autag: if autag:
author = self.tag_to_string(autag, False) author = self.tag_to_string(autag, False)
if not articles.has_key(key): # noqa if key not in articles:
articles[key] = [] articles[key] = []
articles[key].append(dict(title=title, url=url, date=pubdate, articles[key].append(dict(title=title, url=url, date=pubdate,
description=description, author=author, content='')) description=description, author=author, content=''))
ans = [(k, articles[k]) for k in ans if articles.has_key(k)] # noqa ans = [(k, articles[k]) for k in ans if k in articles]
return ans return ans

View File

@ -45,11 +45,11 @@ class SCPrintMagazine(BasicNewsRecipe):
if arttitlet is not None: if arttitlet is not None:
mylink = arttitlet.find('a') mylink = arttitlet.find('a')
if mylink is not None: if mylink is not None:
if mylink.has_key('title'): # noqa if mylink.get('title'):
arttitle = mylink['title'] arttitle = mylink['title']
else: else:
arttitle = 'unknown' arttitle = 'unknown'
if mylink.has_key('href'): # noqa if mylink.get('href'):
artlink = mylink['href'] artlink = mylink['href']
artlink = artlink.replace( artlink = artlink.replace(
"/article", "/printarticle") "/article", "/printarticle")

View File

@ -81,7 +81,7 @@ class SZmobil(BasicNewsRecipe):
if itt['href'].startswith('article.php?id='): if itt['href'].startswith('article.php?id='):
article_url = itt['href'] article_url = itt['href']
article_id = int( article_id = int(
re.search("id=(\d*)&etag=", itt['href']).group(1)) re.search(r"id=(\d*)&etag=", itt['href']).group(1))
# first check if link is a special article in section # first check if link is a special article in section
# "Meinungsseite" # "Meinungsseite"
@ -104,7 +104,7 @@ class SZmobil(BasicNewsRecipe):
# just another link ("mehr") to an article # just another link ("mehr") to an article
continue continue
if itt.has_key('id'): # noqa if itt.get('id') is not None:
shorttitles[article_id] = article_name shorttitles[article_id] = article_name
else: else:
articles.append( articles.append(
@ -118,7 +118,7 @@ class SZmobil(BasicNewsRecipe):
# pubdate = strftime('') # pubdate = strftime('')
pubdate = strftime('[%a, %d %b]') pubdate = strftime('[%a, %d %b]')
description = '' description = ''
if shorttitles.has_key(article_id): # noqa if shorttitles.get(article_id) is not None:
description = shorttitles[article_id] description = shorttitles[article_id]
# we do not want the flag ("Impressum") # we do not want the flag ("Impressum")
if "HERAUSGEGEBEN VOM" in description: if "HERAUSGEGEBEN VOM" in description:

View File

@ -55,7 +55,6 @@ class TechCrunch(BasicNewsRecipe):
else: else:
str = self.tag_to_string(item) str = self.tag_to_string(item)
item.replaceWith(str) item.replaceWith(str)
for item in soup.findAll('img'): for item in soup.findAll('img', alt=False):
if not item.has_key('alt'): # noqa item['alt'] = 'image'
item['alt'] = 'image'
return soup return soup

View File

@ -45,7 +45,7 @@ class TheAge(BasicNewsRecipe):
# Make sure to skip: <a href="/">TheAge</a> # Make sure to skip: <a href="/">TheAge</a>
elif section and tag.has_key('href') and len(tag['href'].strip()) > 1: # noqa elif section and tag.get('href'):
url = tag['href'].strip() url = tag['href'].strip()
if url.startswith('/'): if url.startswith('/'):
url = 'http://www.theage.com.au' + url url = 'http://www.theage.com.au' + url
@ -105,7 +105,7 @@ class TheAge(BasicNewsRecipe):
# Filter out what's left of the text-mode navigation stuff # Filter out what's left of the text-mode navigation stuff
if re.match('((\s)|(\&nbsp\;))*\[[\|\s*]*\]((\s)|(\&nbsp\;))*$', contents): if re.match(r'((\s)|(\&nbsp\;))*\[[\|\s*]*\]((\s)|(\&nbsp\;))*$', contents):
p.extract() p.extract()
continue continue

View File

@ -47,6 +47,6 @@ class Edgesingapore(BasicNewsRecipe):
for item in soup.body.findAll(name=['table', 'td', 'tr', 'th', 'caption', 'thead', 'tfoot', 'tbody', 'colgroup', 'col']): for item in soup.body.findAll(name=['table', 'td', 'tr', 'th', 'caption', 'thead', 'tfoot', 'tbody', 'colgroup', 'col']):
item.name = 'div' item.name = 'div'
for attrib in attribs: for attrib in attribs:
if item.has_key(attrib): # noqa item[attrib] = ''
del item[attrib] del item[attrib]
return self.adeify_images(soup) return self.adeify_images(soup)

View File

@ -78,12 +78,12 @@ class TheOnion(BasicNewsRecipe):
if limg: if limg:
item.name = 'div' item.name = 'div'
item.attrs = [] item.attrs = []
if not limg.has_key('alt'): # noqa if not limg.get('alt'):
limg['alt'] = 'image' limg['alt'] = 'image'
else: else:
str = self.tag_to_string(item) str = self.tag_to_string(item)
item.replaceWith(str) item.replaceWith(str)
for item in soup.findAll('img'): for item in soup.findAll('img'):
if item.has_key('data-src'): # noqa if item.get('data-src'):
item['src'] = item['data-src'] item['src'] = item['data-src']
return soup return soup

View File

@ -57,8 +57,8 @@ class Tomshardware(BasicNewsRecipe):
def cleanup_image_tags(self, soup): def cleanup_image_tags(self, soup):
for item in soup.findAll('img'): for item in soup.findAll('img'):
for attrib in ['height', 'width', 'border', 'align']: for attrib in ['height', 'width', 'border', 'align']:
if item.has_key(attrib): # noqa item[attrib] = ''
del item[attrib] del item[attrib]
return soup return soup
def preprocess_html(self, soup): def preprocess_html(self, soup):

View File

@ -45,7 +45,6 @@ class Twitchfilm(BasicNewsRecipe):
else: else:
str = self.tag_to_string(item) str = self.tag_to_string(item)
item.replaceWith(str) item.replaceWith(str)
for item in soup.findAll('img'): for item in soup.findAll('img', alt=False):
if not item.has_key('alt'): # noqa item['alt'] = 'image'
item['alt'] = 'image'
return soup return soup

View File

@ -299,7 +299,7 @@ class CanWestPaper(BasicNewsRecipe):
else: else:
description = self.tag_to_string(dtag, False) description = self.tag_to_string(dtag, False)
print("DESCRIPTION: " + description) print("DESCRIPTION: " + description)
if not articles.has_key(key): # noqa if key not in articles:
articles[key] = [] articles[key] = []
articles[key].append(dict( articles[key].append(dict(
title=title, url=url, date='', description=description, author='', content='')) title=title, url=url, date='', description=description, author='', content=''))
@ -323,5 +323,5 @@ class CanWestPaper(BasicNewsRecipe):
for (k, url) in self.postmedia_index_pages: for (k, url) in self.postmedia_index_pages:
parse_web_index(k, url) parse_web_index(k, url)
ans = [(key, articles[key]) for key in ans if articles.has_key(key)] # noqa ans = [(key, articles[key]) for key in ans if key in articles]
return ans return ans

View File

@ -287,7 +287,7 @@ class CanWestPaper(BasicNewsRecipe):
else: else:
description = self.tag_to_string(dtag, False) description = self.tag_to_string(dtag, False)
print("DESCRIPTION: " + description) print("DESCRIPTION: " + description)
if not articles.has_key(key): # noqa if key not in articles:
articles[key] = [] articles[key] = []
articles[key].append(dict( articles[key].append(dict(
title=title, url=url, date='', description=description, author='', content='')) title=title, url=url, date='', description=description, author='', content=''))
@ -311,5 +311,5 @@ class CanWestPaper(BasicNewsRecipe):
for (k, url) in self.postmedia_index_pages: for (k, url) in self.postmedia_index_pages:
parse_web_index(k, url) parse_web_index(k, url)
ans = [(key, articles[key]) for key in ans if articles.has_key(key)] # noqa ans = [(key, articles[key]) for key in ans if key in articles] # noqa
return ans return ans

View File

@ -62,7 +62,6 @@ class Variety(BasicNewsRecipe):
else: else:
str = self.tag_to_string(item) str = self.tag_to_string(item)
item.replaceWith(str) item.replaceWith(str)
for item in soup.findAll('img'): for item in soup.findAll('img', alt=False):
if not item.has_key('alt'): # noqa item['alt'] = 'image'
item['alt'] = 'image'
return soup return soup

View File

@ -62,16 +62,22 @@ class VedomostiRecipe(BasicNewsRecipe):
if not feedData: if not feedData:
raise NotImplementedError raise NotImplementedError
self.log("parse_index: Feed loaded successfully.") self.log("parse_index: Feed loaded successfully.")
if feedData.feed.has_key('title'): # noqa try:
self.title = feedData.feed.title if feedData.feed.title:
self.log("parse_index: Title updated to: ", self.title) self.title = feedData.feed.title
if feedData.feed.has_key('description'): # noqa self.log("parse_index: Title updated to: ", self.title)
self.description = feedData.feed.description except Exception:
self.log("parse_index: Description updated to: ", pass
self.description) try:
if feedData.feed.description:
self.description = feedData.feed.description
self.log("parse_index: Description updated to: ",
self.description)
except Exception:
pass
def get_virtual_feed_articles(feed): def get_virtual_feed_articles(feed):
if feeds.has_key(feed): # noqa if feed in feeds:
return feeds[feed][1] return feeds[feed][1]
self.log("Adding new feed: ", feed) self.log("Adding new feed: ", feed)
articles = [] articles = []
@ -88,7 +94,7 @@ class VedomostiRecipe(BasicNewsRecipe):
continue continue
article = {'title': title, 'url': link, 'description': item.get( article = {'title': title, 'url': link, 'description': item.get(
'description', ''), 'date': item.get('date', ''), 'content': ''} 'description', ''), 'date': item.get('date', ''), 'content': ''}
if not item.has_key('tags'): # noqa if not item.get('tags'): # noqa
get_virtual_feed_articles('_default').append(article) get_virtual_feed_articles('_default').append(article)
continue continue
for tag in item.tags: for tag in item.tags:
@ -105,7 +111,7 @@ class VedomostiRecipe(BasicNewsRecipe):
# Select sorted feeds first of all # Select sorted feeds first of all
result = [] result = []
for feedName in self.sortOrder: for feedName in self.sortOrder:
if (not feeds.has_key(feedName)): # noqa if (not feeds.get(feedName)):
continue continue
result.append(feeds[feedName]) result.append(feeds[feedName])
del feeds[feedName] del feeds[feedName]
@ -142,9 +148,9 @@ class VedomostiRecipe(BasicNewsRecipe):
imgDiv = Tag(soup, 'div') imgDiv = Tag(soup, 'div')
imgDiv['class'] = 'article_img' imgDiv['class'] = 'article_img'
if img.has_key('width'): # noqa if img.get('width'):
del(img['width']) del(img['width'])
if img.has_key('height'): # noqa if img.get('height'):
del(img['height']) del(img['height'])
# find description # find description
@ -180,11 +186,9 @@ class VedomostiRecipe(BasicNewsRecipe):
contents.insert(len(contents.contents), authorsP) contents.insert(len(contents.contents), authorsP)
# Fix urls that use relative path # Fix urls that use relative path
urls = contents.findAll('a') urls = contents.findAll('a', href=True)
if urls: if urls:
for url in urls: for url in urls:
if not url.has_key('href'): # noqa
continue
if '/' == url['href'][0]: if '/' == url['href'][0]:
url['href'] = self.base_url + url['href'] url['href'] = self.base_url + url['href']

View File

@ -94,8 +94,8 @@ class WaPoCartoonsRecipe(BasicNewsRecipe):
img = soup.find('img', attrs={'class': 'pic_big'}) img = soup.find('img', attrs={'class': 'pic_big'})
if img: if img:
td = img.parent td = img.parent
if td.has_key('style'): # noqa td['style'] = ''
del td['style'] del td['style']
td.name = 'div' td.name = 'div'
td['id'] = 'comic_full' td['id'] = 'comic_full'
freshSoup.body.append(td) freshSoup.body.append(td)
@ -134,11 +134,8 @@ class WaPoCartoonsRecipe(BasicNewsRecipe):
'June': '06', 'July': '07', 'August': '08', 'September': '09', 'October': '10', 'June': '06', 'July': '07', 'August': '08', 'September': '09', 'October': '10',
'November': '11', 'December': '12'} 'November': '11', 'December': '12'}
opts = select.findAll('option') opts = select.findAll('option', selected=False)
for i in range(1, len(opts)): for i in range(1, len(opts)):
if opts[i].has_key('selected'): # noqa
continue
dateString = self.tag_to_string(opts[i]) dateString = self.tag_to_string(opts[i])
rest, sep, year = dateString.rpartition(', ') rest, sep, year = dateString.rpartition(', ')
parts = rest.split(' ') parts = rest.split(' ')

View File

@ -42,11 +42,11 @@ class TheCND(BasicNewsRecipe):
url = 'http://bbs.wenxuecity.com' + url url = 'http://bbs.wenxuecity.com' + url
title = self.tag_to_string(a) title = self.tag_to_string(a)
self.log('\tFound article: ', title, ' at:', url) self.log('\tFound article: ', title, ' at:', url)
dateReg = re.search('(\d\d?)/(\d\d?)/(\d\d)', dateReg = re.search(r'(\d\d?)/(\d\d?)/(\d\d)',
self.tag_to_string(a.parent)) self.tag_to_string(a.parent))
date = '%(y)s/%(m)02d/%(d)02d' % {'y': dateReg.group(3), date = '%(y)s/%(m)02d/%(d)02d' % {'y': dateReg.group(3),
'm': int(dateReg.group(1)), 'd': int(dateReg.group(2))} 'm': int(dateReg.group(1)), 'd': int(dateReg.group(2))}
if not articles.has_key(date): # noqa if date not in articles: # noqa
articles[date] = [] articles[date] = []
articles[date].append( articles[date].append(
{'title': title, 'url': url, 'description': '', 'date': ''}) {'title': title, 'url': url, 'description': '', 'date': ''})

View File

@ -96,10 +96,10 @@ class CanWestPaper(BasicNewsRecipe):
autag = divtag.find('h4') autag = divtag.find('h4')
if autag: if autag:
author = self.tag_to_string(autag, False) author = self.tag_to_string(autag, False)
if not articles.has_key(key): # noqa if key not in articles:
articles[key] = [] articles[key] = []
articles[key].append(dict(title=title, url=url, date=pubdate, articles[key].append(dict(title=title, url=url, date=pubdate,
description=description, author=author, content='')) description=description, author=author, content=''))
ans = [(keyl, articles[key]) for keyl in ans if articles.has_key(keyl)] # noqa ans = [(keyl, articles[key]) for keyl in ans if keyl in articles]
return ans return ans

View File

@ -189,9 +189,9 @@ class CanWestPaper(BasicNewsRecipe):
# Find each instance of class="sectiontitle", class="featurecontent" # Find each instance of class="sectiontitle", class="featurecontent"
for divtag in soup.findAll('div', attrs={'class': ["section_title02", "featurecontent"]}): for divtag in soup.findAll('div', attrs={'class': ["section_title02", "featurecontent"]}):
# self.log(" div class = %s" % divtag['class']) # self.log(" div class = %s" % divtag['class'])
if divtag['class'].startswith('section_title'): if divtag['class'].startswith('section_title'):
# div contains section title # div contains section title
if not divtag.h3: if not divtag.h3:
continue continue
key = self.tag_to_string(divtag.h3, False) key = self.tag_to_string(divtag.h3, False)
@ -221,11 +221,11 @@ class CanWestPaper(BasicNewsRecipe):
if autag: if autag:
author = self.tag_to_string(autag, False) author = self.tag_to_string(autag, False)
# self.log("author %s" % author) # self.log("author %s" % author)
if not articles.has_key(key): # noqa if key not in articles:
articles[key] = [] articles[key] = []
articles[key].append(dict(title=title, url=url, date=pubdate, articles[key].append(dict(title=title, url=url, date=pubdate,
description=description, author=author, content='')) description=description, author=author, content=''))
ans = [(keyl, articles[keyl]) ans = [(keyl, articles[keyl])
for keyl in ans if articles.has_key(keyl)] # noqa for keyl in ans if keyl in articles]
return ans return ans

View File

@ -83,11 +83,10 @@ class ZAOBAO(BasicNewsRecipe):
] ]
def preprocess_html(self, soup): def preprocess_html(self, soup):
for tag in soup.findAll(name='a'): for tag in soup.findAll(name='a', href=True):
if tag.has_key('href'): # noqa tag_url = tag['href']
tag_url = tag['href'] if tag_url.find('http://') != -1 and tag_url.find('zaobao.com') == -1:
if tag_url.find('http://') != -1 and tag_url.find('zaobao.com') == -1: del tag['href']
del tag['href']
return soup return soup
def postprocess_html(self, soup, first): def postprocess_html(self, soup, first):
@ -107,8 +106,8 @@ class ZAOBAO(BasicNewsRecipe):
for i, item in enumerate(soup.findAll('li')): for i, item in enumerate(soup.findAll('li')):
if i >= self.MAX_ITEMS_IN_INDEX: if i >= self.MAX_ITEMS_IN_INDEX:
break break
a = item.find('a') a = item.find('a', href=True)
if a and a.has_key('href'): # noqa if a is not None:
a_url = a['href'] a_url = a['href']
a_title = self.tag_to_string(a) a_title = self.tag_to_string(a)
date = '' date = ''