mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Get rid of has_key from all recipes
This commit is contained in:
parent
3c86c9be6f
commit
5755625d1e
@ -217,7 +217,7 @@ A reasonably complex real life example that exposes more of the :term:`API` of `
|
||||
description = self.tag_to_string(summary, use_alt=False)
|
||||
|
||||
feed = key if key is not None else 'Uncategorized'
|
||||
if not articles.has_key(feed):
|
||||
if feed not in articles:
|
||||
articles[feed] = []
|
||||
if not 'podcasts' in url:
|
||||
articles[feed].append(
|
||||
@ -225,7 +225,7 @@ A reasonably complex real life example that exposes more of the :term:`API` of `
|
||||
description=description,
|
||||
content=''))
|
||||
ans = self.sort_index_by(ans, {'The Front Page':-1, 'Dining In, Dining Out':1, 'Obituaries':2})
|
||||
ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
|
||||
ans = [(key, articles[key]) for key in ans if key in articles]
|
||||
return ans
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
|
@ -60,7 +60,6 @@ class t20Minutos(BasicNewsRecipe):
|
||||
else:
|
||||
str = self.tag_to_string(item)
|
||||
item.replaceWith(str)
|
||||
for item in soup.findAll('img'):
|
||||
if not item.has_key('alt'): # noqa
|
||||
item['alt'] = 'image'
|
||||
for item in soup.findAll('img', alt=False):
|
||||
item['alt'] = 'image'
|
||||
return soup
|
||||
|
@ -112,7 +112,7 @@ class AdvancedUserRecipe1282101454(BasicNewsRecipe):
|
||||
for div in soup.findAll('div', attrs={'class': re.compile(self.author_reg_exp, re.IGNORECASE)}):
|
||||
div.extract()
|
||||
for auth in div.findAll('a'):
|
||||
if (auth.has_key('class') and auth['class'] == 'cm-source-image'): # noqa
|
||||
if auth.get('class') == 'cm-source-image':
|
||||
continue
|
||||
names = names + comma + auth.contents[0]
|
||||
comma = ', '
|
||||
|
@ -25,7 +25,7 @@ class AdvancedUserRecipe1290663986(BasicNewsRecipe):
|
||||
def generic_parse(self, soup):
|
||||
articles = []
|
||||
# soup.findAll('li', 'hentry'):
|
||||
for entry in soup.findAll(lambda tag: tag.name == 'li' and tag.has_key('class') and tag['class'].find('hentry') != -1): # noqa
|
||||
for entry in soup.findAll('li', attrs={'class': lambda x: x and 'hentry' in x}):
|
||||
article_url = entry.a['href'] + '?print=yes'
|
||||
article_title = entry.find('h3', 'entry-title')
|
||||
article_title = self.tag_to_string(article_title)
|
||||
@ -48,7 +48,7 @@ class AdvancedUserRecipe1290663986(BasicNewsRecipe):
|
||||
|
||||
def plumaje_parse(self, soup):
|
||||
articles = []
|
||||
blogs_soup = soup.find(lambda tag: tag.name == 'ul' and tag.has_key('class') and tag['class'].find('bloglist-fecha') != -1) # noqa
|
||||
blogs_soup = soup.find('ul', attrs={'class': lambda x: x and 'bloglist-fecha' in x})
|
||||
for entry in blogs_soup.findAll('li'):
|
||||
article_title = entry.p
|
||||
article_url = article_title.a['href'] + '?print=yes'
|
||||
@ -69,7 +69,7 @@ class AdvancedUserRecipe1290663986(BasicNewsRecipe):
|
||||
def boca_parse(self, soup):
|
||||
articles = []
|
||||
# soup.findAll('li', 'hentry'):
|
||||
for entry in soup.findAll(lambda tag: tag.name == 'div' and tag.has_key('class') and tag['class'].find('hentry') != -1): # noqa
|
||||
for entry in soup.findAll('div', attrs={'class': lambda x: x and 'hentry' in x}):
|
||||
article_title = entry.find('h2', 'entry-title')
|
||||
article_url = article_title.a['href'] + '?print=yes'
|
||||
article_title = self.tag_to_string(article_title)
|
||||
|
@ -62,8 +62,8 @@ class BuenosAiresHerald(BasicNewsRecipe):
|
||||
soup = self.index_to_soup(feedurl)
|
||||
for item in soup.findAll('div', attrs={'class': 'nota_texto_seccion'}):
|
||||
description = self.tag_to_string(item.h2)
|
||||
atag = item.h2.find('a')
|
||||
if atag and atag.has_key('href'): # noqa
|
||||
atag = item.h2.find('a', href=True)
|
||||
if atag is not None:
|
||||
url = self.INDEX + atag['href']
|
||||
title = description
|
||||
date = strftime(self.timefmt)
|
||||
|
@ -50,8 +50,8 @@ class BenchmarkPl(BasicNewsRecipe):
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
self.append_page(soup, soup.body)
|
||||
for a in soup('a'):
|
||||
if a.has_key('href') and not a['href'].startswith('http'): # noqa
|
||||
for a in soup.findAll('a', href=True):
|
||||
if not a['href'].startswith('http'):
|
||||
a['href'] = self.INDEX + a['href']
|
||||
for r in soup.findAll(attrs={'class': ['comments', 'body']}):
|
||||
r.extract()
|
||||
|
@ -55,7 +55,6 @@ class BigHollywood(BasicNewsRecipe):
|
||||
else:
|
||||
str = self.tag_to_string(item)
|
||||
item.replaceWith(str)
|
||||
for item in soup.findAll('img'):
|
||||
if not item.has_key('alt'): # noqa
|
||||
item['alt'] = 'image'
|
||||
for item in soup.findAll('img', alt=False):
|
||||
item['alt'] = 'image'
|
||||
return soup
|
||||
|
@ -59,7 +59,6 @@ class Business_insider(BasicNewsRecipe):
|
||||
if item.string is not None:
|
||||
tstr = item.string
|
||||
item.replaceWith(tstr)
|
||||
for item in soup.findAll('img'):
|
||||
if not item.has_key('alt'): # noqa
|
||||
item['alt'] = 'image'
|
||||
for item in soup.findAll('img', alt=False):
|
||||
item['alt'] = 'image'
|
||||
return soup
|
||||
|
@ -286,7 +286,7 @@ class CanWestPaper(BasicNewsRecipe):
|
||||
else:
|
||||
description = self.tag_to_string(dtag, False)
|
||||
print("DESCRIPTION: " + description)
|
||||
if not articles.has_key(key): # noqa
|
||||
if key not in articles:
|
||||
articles[key] = []
|
||||
articles[key].append(dict(
|
||||
title=title, url=url, date='', description=description, author='', content=''))
|
||||
@ -310,5 +310,5 @@ class CanWestPaper(BasicNewsRecipe):
|
||||
|
||||
for (k, url) in self.postmedia_index_pages:
|
||||
parse_web_index(k, url)
|
||||
ans = [(key, articles[key]) for key in ans if articles.has_key(key)] # noqa
|
||||
ans = [(key, articles[key]) for key in ans if key in articles]
|
||||
return ans
|
||||
|
@ -22,7 +22,7 @@ class CD_Action(BasicNewsRecipe):
|
||||
return getattr(self, 'cover_url', self.cover_url)
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for a in soup('a'):
|
||||
if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']: # noqa
|
||||
for a in soup.findAll('a', href=True):
|
||||
if 'http://' not in a['href'] and 'https://' not in a['href']:
|
||||
a['href'] = self.index + a['href']
|
||||
return soup
|
||||
|
@ -101,10 +101,7 @@ class CSMonitor(BasicNewsRecipe):
|
||||
else:
|
||||
str = self.tag_to_string(item)
|
||||
item.replaceWith(str)
|
||||
for item in soup.findAll('img'):
|
||||
for item in soup.findAll('img', src=True):
|
||||
if 'scorecardresearch' in item['src']:
|
||||
item.extract()
|
||||
else:
|
||||
if not item.has_key('alt'): # noqa
|
||||
item['alt'] = 'image'
|
||||
return soup
|
||||
|
@ -36,10 +36,9 @@ class Cinebel(BasicNewsRecipe):
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for alink in soup.findAll('a'):
|
||||
if alink.has_key('href'): # noqa
|
||||
tstr = "Site officiel: " + alink['href']
|
||||
alink.replaceWith(tstr)
|
||||
for alink in soup.findAll('a', href=True):
|
||||
tstr = "Site officiel: " + alink['href']
|
||||
alink.replaceWith(tstr)
|
||||
return soup
|
||||
|
||||
def get_cover_url(self):
|
||||
|
@ -131,12 +131,12 @@ class CIO_Magazine(BasicNewsRecipe):
|
||||
|
||||
# Esto esta copiado del NY times
|
||||
feed = key if key is not None else 'Uncategorized'
|
||||
if not articles.has_key(feed): # noqa
|
||||
if feed not in articles:
|
||||
articles[feed] = []
|
||||
if 'podcasts' not in url:
|
||||
articles[feed].append(
|
||||
dict(title=title, url=url, date=pubdate,
|
||||
description=description,
|
||||
content=''))
|
||||
feeds = [(k, articles[k]) for k in feeds if articles.has_key(k)] # noqa
|
||||
feeds = [(k, articles[k]) for k in feeds if k in articles]
|
||||
return feeds
|
||||
|
@ -54,7 +54,7 @@ class TheCND(BasicNewsRecipe):
|
||||
if re.search('cm', date):
|
||||
continue
|
||||
if (date is not None) and len(date) > 2:
|
||||
if not articles.has_key(date): # noqa
|
||||
if date not in articles:
|
||||
articles[date] = []
|
||||
articles[date].append(
|
||||
{'title': title, 'url': url, 'description': '', 'date': ''})
|
||||
|
@ -54,7 +54,7 @@ class TheCND(BasicNewsRecipe):
|
||||
continue
|
||||
self.log('\tFound article: ', title, 'at', url, '@', date)
|
||||
if (date is not None) and len(date) > 2:
|
||||
if not articles.has_key(date): # noqa
|
||||
if date not in articles:
|
||||
articles[date] = []
|
||||
articles[date].append(
|
||||
{'title': title, 'url': url, 'description': '', 'date': ''})
|
||||
|
@ -53,8 +53,8 @@ class General(BasicNewsRecipe):
|
||||
for item in soup.body.findAll(name=['table', 'td', 'tr', 'th', 'caption', 'thead', 'tfoot', 'tbody', 'colgroup', 'col']):
|
||||
item.name = 'div'
|
||||
for attrib in attribs:
|
||||
if item.has_key(attrib): # noqa
|
||||
del item[attrib]
|
||||
item[attrib] = ''
|
||||
del item[attrib]
|
||||
return soup
|
||||
|
||||
def get_cover_url(self):
|
||||
|
@ -46,7 +46,6 @@ class CubaDebate(BasicNewsRecipe):
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
for item in soup.findAll('img'):
|
||||
if not item.has_key('alt'): # noqa
|
||||
item['alt'] = 'image'
|
||||
for item in soup.findAll('img', alt=False):
|
||||
item['alt'] = 'image'
|
||||
return soup
|
||||
|
@ -57,7 +57,6 @@ class Pagina12(BasicNewsRecipe):
|
||||
else:
|
||||
str = self.tag_to_string(item)
|
||||
item.replaceWith(str)
|
||||
for item in soup.findAll('img'):
|
||||
if not item.has_key('alt'): # noqa
|
||||
item['alt'] = 'image'
|
||||
for item in soup.findAll('img', alt=False):
|
||||
item['alt'] = 'image'
|
||||
return soup
|
||||
|
@ -64,7 +64,7 @@ class DeGentenaarOnline(BasicNewsRecipe):
|
||||
del item['style']
|
||||
for item in soup.findAll('span'):
|
||||
item.name = 'div'
|
||||
if item.has_key('id') and item['id'] == 'lblArticleTitle': # noqa
|
||||
if item.get('id') == 'lblArticleTitle':
|
||||
item.name = 'h3'
|
||||
|
||||
soup.html['lang'] = self.lang
|
||||
|
@ -65,8 +65,8 @@ class DeutscheWelle_bs(BasicNewsRecipe):
|
||||
if limg:
|
||||
item.name = 'div'
|
||||
del item['href']
|
||||
if item.has_key('target'): # noqa
|
||||
del item['target']
|
||||
item['target'] = ''
|
||||
del item['target']
|
||||
else:
|
||||
str = self.tag_to_string(item)
|
||||
item.replaceWith(str)
|
||||
|
@ -63,8 +63,8 @@ class DeutscheWelle_hr(BasicNewsRecipe):
|
||||
if limg:
|
||||
item.name = 'div'
|
||||
del item['href']
|
||||
if item.has_key('target'): # noqa
|
||||
del item['target']
|
||||
item['target'] = ''
|
||||
del item['target']
|
||||
else:
|
||||
str = self.tag_to_string(item)
|
||||
item.replaceWith(str)
|
||||
|
@ -54,8 +54,8 @@ class DeutscheWelle_pt(BasicNewsRecipe):
|
||||
if limg:
|
||||
item.name = 'div'
|
||||
del item['href']
|
||||
if item.has_key('target'): # noqa
|
||||
del item['target']
|
||||
item['target'] = ''
|
||||
del item['target']
|
||||
else:
|
||||
str = self.tag_to_string(item)
|
||||
item.replaceWith(str)
|
||||
|
@ -68,8 +68,8 @@ class DeutscheWelle_sr(BasicNewsRecipe):
|
||||
if limg:
|
||||
item.name = 'div'
|
||||
del item['href']
|
||||
if item.has_key('target'): # noqa
|
||||
del item['target']
|
||||
item['target'] = ''
|
||||
del item['target']
|
||||
else:
|
||||
str = self.tag_to_string(item)
|
||||
item.replaceWith(str)
|
||||
|
@ -55,8 +55,8 @@ class DnevnikCro(BasicNewsRecipe):
|
||||
for item in soup.body.findAll(name=['table', 'td', 'tr', 'th', 'caption', 'thead', 'tfoot', 'tbody', 'colgroup', 'col']):
|
||||
item.name = 'div'
|
||||
for attrib in attribs:
|
||||
if item.has_key(attrib): # noqa
|
||||
del item[attrib]
|
||||
item[attrib] = ''
|
||||
del item[attrib]
|
||||
|
||||
mlang = Tag(soup, 'meta', [
|
||||
("http-equiv", "Content-Language"), ("content", self.lang)])
|
||||
|
@ -37,7 +37,6 @@ class DobaNevinosti(BasicNewsRecipe):
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
for item in soup.findAll('img'):
|
||||
if not item.has_key('alt'): # noqa
|
||||
item['alt'] = 'image'
|
||||
for item in soup.findAll('img', alt=False):
|
||||
item['alt'] = 'image'
|
||||
return soup
|
||||
|
@ -28,8 +28,8 @@ class Dobreprogramy_pl(BasicNewsRecipe):
|
||||
('Blogi', 'http://feeds.feedburner.com/dobreprogramy/BlogCzytelnikow')]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for a in soup('a'):
|
||||
if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']: # noqa
|
||||
for a in soup('a', href=True):
|
||||
if 'http://' not in a['href'] and 'https://' not in a['href']:
|
||||
a['href'] = self.index + a['href']
|
||||
for r in soup.findAll('iframe'):
|
||||
r.parent.extract()
|
||||
|
@ -81,8 +81,8 @@ class Dzieje(BasicNewsRecipe):
|
||||
return feeds
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for a in soup('a'):
|
||||
if a.has_key('href') and not a['href'].startswith('http'): # noqa
|
||||
for a in soup('a', href=True):
|
||||
if not a['href'].startswith('http'):
|
||||
a['href'] = self.index + a['href']
|
||||
self.append_page(soup, soup.body)
|
||||
return soup
|
||||
|
@ -286,7 +286,7 @@ class CanWestPaper(BasicNewsRecipe):
|
||||
else:
|
||||
description = self.tag_to_string(dtag, False)
|
||||
print("DESCRIPTION: " + description)
|
||||
if not articles.has_key(key): # noqa
|
||||
if key not in articles:
|
||||
articles[key] = []
|
||||
articles[key].append(dict(
|
||||
title=title, url=url, date='', description=description, author='', content=''))
|
||||
@ -310,5 +310,5 @@ class CanWestPaper(BasicNewsRecipe):
|
||||
|
||||
for (k, url) in self.postmedia_index_pages:
|
||||
parse_web_index(k, url)
|
||||
ans = [(key, articles[key]) for key in ans if articles.has_key(key)] # noqa
|
||||
ans = [(key, articles[key]) for key in ans if key in articles]
|
||||
return ans
|
||||
|
@ -113,11 +113,11 @@ class ElDiplo_Recipe(BasicNewsRecipe):
|
||||
if aut:
|
||||
auth = self.tag_to_string(aut, use_alt=False).strip()
|
||||
|
||||
if not articles.has_key(section): # noqa
|
||||
if section not in articles: # noqa
|
||||
articles[section] = []
|
||||
|
||||
articles[section].append(dict(
|
||||
title=title, author=auth, url=url, date=None, description=description, content=''))
|
||||
|
||||
ans = [(s, articles[s]) for s in ans if articles.has_key(s)] # noqa
|
||||
ans = [(s, articles[s]) for s in ans if s in articles]
|
||||
return ans
|
||||
|
@ -53,7 +53,6 @@ class ElClubDelEbook(BasicNewsRecipe):
|
||||
else:
|
||||
str = self.tag_to_string(item)
|
||||
item.replaceWith(str)
|
||||
for item in soup.findAll('img'):
|
||||
if not item.has_key('alt'): # noqa
|
||||
item['alt'] = 'image'
|
||||
for item in soup.findAll('img', alt=False):
|
||||
item['alt'] = 'image'
|
||||
return soup
|
||||
|
@ -35,17 +35,16 @@ class ElPaisSemanal(BasicNewsRecipe):
|
||||
def parse_index(self):
|
||||
articles = []
|
||||
soup = self.index_to_soup(self.index)
|
||||
for item in soup.findAll('a', attrs={'class': ['g19i003', 'g17r003', 'g17i003']}):
|
||||
for item in soup.findAll('a', attrs={'class': ['g19i003', 'g17r003', 'g17i003']}, href=True):
|
||||
description = ''
|
||||
title_prefix = ''
|
||||
feed_link = item
|
||||
if item.has_key('href'): # noqa
|
||||
url = 'http://www.elpais.com' + item['href'].rpartition('/')[0]
|
||||
title = title_prefix + self.tag_to_string(feed_link)
|
||||
date = strftime(self.timefmt)
|
||||
articles.append({
|
||||
'title': title, 'date': date, 'url': url, 'description': description
|
||||
})
|
||||
url = 'http://www.elpais.com' + item['href'].rpartition('/')[0]
|
||||
title = title_prefix + self.tag_to_string(feed_link)
|
||||
date = strftime(self.timefmt)
|
||||
articles.append({
|
||||
'title': title, 'date': date, 'url': url, 'description': description
|
||||
})
|
||||
return [(soup.head.title.string, articles)]
|
||||
|
||||
def print_version(self, url):
|
||||
|
@ -31,7 +31,7 @@ class ElUniversalImpresaRecipe(BasicNewsRecipe):
|
||||
|
||||
table = soup.find('table', attrs={'width': '500'})
|
||||
articles = []
|
||||
for td in table.findAll(lambda tag: tag.name == 'td' and tag.has_key('class') and tag['class'] == 'arnegro12'): # noqa
|
||||
for td in table.findAll('td', attrs={'class': 'arnegro12'}):
|
||||
a = td.a
|
||||
a.extract()
|
||||
title = self.tag_to_string(a)
|
||||
@ -79,8 +79,8 @@ class ElUniversalImpresaRecipe(BasicNewsRecipe):
|
||||
tag = soup.find('font', attrs={'color': '#0F046A'})
|
||||
if tag:
|
||||
for attr in ['color', 'face', 'helvetica,', 'sans-serif', 'size']:
|
||||
if tag.has_key(attr): # noqa
|
||||
del tag[attr]
|
||||
tag[attr] = ''
|
||||
del tag[attr]
|
||||
tag.name = 'h1'
|
||||
|
||||
return soup
|
||||
|
@ -62,8 +62,8 @@ class ESPN(BasicNewsRecipe):
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for div in soup.findAll('div'):
|
||||
if div.has_key('style') and 'px' in div['style']: # noqa
|
||||
for div in soup.findAll('div', style=True):
|
||||
if 'px' in div['style']:
|
||||
div['style'] = ''
|
||||
|
||||
return soup
|
||||
|
@ -88,7 +88,7 @@ class Estadao(BasicNewsRecipe):
|
||||
def postprocess_html(self, soup, first):
|
||||
# process all the images. assumes that the new html has the correct
|
||||
# path
|
||||
for tag in soup.findAll(lambda tag: tag.name.lower() == 'img' and tag.has_key('src')): # noqa
|
||||
for tag in soup.findAll('img', src=True):
|
||||
iurl = tag['src']
|
||||
img = Image()
|
||||
img.open(iurl)
|
||||
|
@ -57,7 +57,6 @@ class FinancialSense(BasicNewsRecipe):
|
||||
else:
|
||||
str = self.tag_to_string(item)
|
||||
item.replaceWith(str)
|
||||
for item in soup.findAll('img'):
|
||||
if not item.has_key('alt'): # noqa
|
||||
item['alt'] = 'image'
|
||||
for item in soup.findAll('img', alt=False):
|
||||
item['alt'] = 'image'
|
||||
return soup
|
||||
|
@ -84,8 +84,8 @@ class General(BasicNewsRecipe):
|
||||
for item in soup.body.findAll(name=['table', 'td', 'tr', 'th', 'caption', 'thead', 'tfoot', 'tbody', 'colgroup', 'col']):
|
||||
item.name = 'div'
|
||||
for attrib in attribs:
|
||||
if item.has_key(attrib): # noqa
|
||||
del item[attrib]
|
||||
item[attrib] = ''
|
||||
del item[attrib]
|
||||
return soup
|
||||
|
||||
def get_cover_url(self):
|
||||
|
@ -27,7 +27,7 @@ class Gameplay_pl(BasicNewsRecipe):
|
||||
return url
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for a in soup('a'):
|
||||
if a.has_key('href') and '../' in a['href']: # noqa
|
||||
for a in soup('a', href=True):
|
||||
if '../' in a['href']:
|
||||
a['href'] = self.index + a['href'][2:]
|
||||
return soup
|
||||
|
@ -56,7 +56,7 @@ class AdvancedUserRecipe1307556816(BasicNewsRecipe):
|
||||
extra_css = 'body, h3, p, div, span{margin:0px; padding:0px} h3.entry-header{font-size: 0.8em} div.entry-body{font-size: 0.7em}'
|
||||
|
||||
def postprocess_html(self, soup, first):
|
||||
for tag in soup.findAll(lambda tag: tag.name.lower() == 'img' and tag.has_key('src')): # noqa
|
||||
for tag in soup.findAll('img', src=True):
|
||||
iurl = tag['src']
|
||||
img = Image()
|
||||
img.open(iurl)
|
||||
|
@ -58,8 +58,8 @@ class Gildia(BasicNewsRecipe):
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
title = soup.title.renderContents().lower()
|
||||
for a in soup('a'):
|
||||
if a.has_key('href') and not a['href'].startswith('http'): # noqa
|
||||
for a in soup('a', href=True):
|
||||
if not a['href'].startswith('http'):
|
||||
if '/gry/' in a['href']:
|
||||
a['href'] = 'http://www.gry.gildia.pl' + a['href']
|
||||
elif u'książk' in title or u'komiks' in title:
|
||||
|
@ -43,8 +43,8 @@ class Gram_pl(BasicNewsRecipe):
|
||||
tag.p.img.extract()
|
||||
tag.p.insert(len(tag.p.contents) - 2,
|
||||
BeautifulSoup('<h2>Ocena: {0}</h2>'.format(rate)).h2)
|
||||
for a in soup('a'):
|
||||
if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']: # noqa
|
||||
for a in soup.findAll('a', href=True):
|
||||
if 'http://' not in a['href'] and 'https://' not in a['href']: # noqa
|
||||
a['href'] = self.index + a['href']
|
||||
tag = soup.find(name='span', attrs={'class': 'platforma'})
|
||||
if tag:
|
||||
|
@ -47,7 +47,7 @@ class in4(BasicNewsRecipe):
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
self.append_page(soup, soup.body)
|
||||
for a in soup('a'):
|
||||
if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']: # noqa
|
||||
for a in soup.findAll('a', href=True):
|
||||
if 'http://' not in a['href'] and 'https://' not in a['href']:
|
||||
a['href'] = self.index + a['href']
|
||||
return soup
|
||||
|
@ -21,7 +21,7 @@ class INFRA(BasicNewsRecipe):
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
for a in soup('a'):
|
||||
if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']: # noqa
|
||||
for a in soup.findAll('a', href=True):
|
||||
if 'http://' not in a['href'] and 'https://' not in a['href']:
|
||||
a['href'] = self.index + a['href']
|
||||
return soup
|
||||
|
@ -65,8 +65,8 @@ class Jutarnji(BasicNewsRecipe):
|
||||
for item in soup.body.findAll(name=['table', 'td', 'tr', 'th', 'caption', 'thead', 'tfoot', 'tbody', 'colgroup', 'col']):
|
||||
item.name = 'div'
|
||||
for attrib in attribs:
|
||||
if item.has_key(attrib): # noqa
|
||||
del item[attrib]
|
||||
item[attrib] = ''
|
||||
del item[attrib]
|
||||
|
||||
mlang = Tag(soup, 'meta', [
|
||||
("http-equiv", "Content-Language"), ("content", self.lang)])
|
||||
|
@ -47,7 +47,7 @@ class AdvancedUserRecipe1299694372(BasicNewsRecipe):
|
||||
soup = self.index_to_soup(feedurl)
|
||||
for item in soup.findAll('table', attrs={'class': ['item', 'item new']}):
|
||||
atag = item.a
|
||||
if atag and atag.has_key('href'): # noqa
|
||||
if atag and atag.get('href') is not None:
|
||||
url = atag['href']
|
||||
articles.append({
|
||||
'url': url
|
||||
|
@ -61,17 +61,18 @@ class LeMondeDiplomatiqueEn(BasicNewsRecipe):
|
||||
cnt = soup.find('div', attrs={'class': 'som_num'})
|
||||
for item in cnt.findAll('li'):
|
||||
description = ''
|
||||
feed_link = item.find('a')
|
||||
feed_link = item.find('a', href=True)
|
||||
if feed_link is None:
|
||||
continue
|
||||
desc = item.find('div', attrs={'class': 'chapo'})
|
||||
if desc:
|
||||
description = desc.string
|
||||
if feed_link and feed_link.has_key('href'): # noqa
|
||||
url = self.PREFIX + feed_link['href'].partition('/../')[2]
|
||||
title = self.tag_to_string(feed_link)
|
||||
date = strftime(self.timefmt)
|
||||
articles.append({
|
||||
'title': title, 'date': date, 'url': url, 'description': description
|
||||
})
|
||||
url = self.PREFIX + feed_link['href'].partition('/../')[2]
|
||||
title = self.tag_to_string(feed_link)
|
||||
date = strftime(self.timefmt)
|
||||
articles.append({
|
||||
'title': title, 'date': date, 'url': url, 'description': description
|
||||
})
|
||||
return [(self.title, articles)]
|
||||
|
||||
def get_cover_url(self):
|
||||
|
@ -60,14 +60,9 @@ class LentaRURecipe(BasicNewsRecipe):
|
||||
if not feedData:
|
||||
raise NotImplementedError
|
||||
self.log("parse_index: Feed loaded successfully.")
|
||||
if feedData.feed.has_key('title'): # noqa
|
||||
self.title = feedData.feed.title
|
||||
self.log("parse_index: Title updated to: ", self.title)
|
||||
if feedData.feed.has_key('image'): # noqa
|
||||
self.log("HAS IMAGE!!!!")
|
||||
|
||||
def get_virtual_feed_articles(feed):
|
||||
if feeds.has_key(feed): # noqa
|
||||
if feed in feeds:
|
||||
return feeds[feed][1]
|
||||
self.log("Adding new feed: ", feed)
|
||||
articles = []
|
||||
@ -84,7 +79,7 @@ class LentaRURecipe(BasicNewsRecipe):
|
||||
continue
|
||||
article = {'title': title, 'url': link, 'description': item.get(
|
||||
'description', ''), 'date': item.get('date', ''), 'content': ''}
|
||||
if not item.has_key('tags'): # noqa
|
||||
if not item.get('tags'):
|
||||
get_virtual_feed_articles('_default').append(article)
|
||||
continue
|
||||
for tag in item.tags:
|
||||
@ -101,7 +96,7 @@ class LentaRURecipe(BasicNewsRecipe):
|
||||
# Select sorted feeds first of all
|
||||
result = []
|
||||
for feedName in self.sortOrder:
|
||||
if (not feeds.has_key(feedName)): # noqa
|
||||
if (not feeds.get(feedName)):
|
||||
continue
|
||||
result.append(feeds[feedName])
|
||||
del feeds[feedName]
|
||||
|
@ -45,7 +45,7 @@ class LondonReviewOfBooksPayed(BasicNewsRecipe):
|
||||
soup = self.index_to_soup(self.INDEX)
|
||||
cover_item = soup.find('p', attrs={'class': 'cover'})
|
||||
dates = str(soup.find('span', attrs={'class': 'coverdate'}))
|
||||
newdates = re.sub('\<.*\>', '', re.split('<br />', dates)[1])
|
||||
newdates = re.sub(r'\<.*\>', '', re.split('<br />', dates)[1])
|
||||
self.timefmt = ' [%s]' % newdates
|
||||
lrbtitle = self.title
|
||||
if cover_item:
|
||||
@ -58,13 +58,13 @@ class LondonReviewOfBooksPayed(BasicNewsRecipe):
|
||||
description = u''
|
||||
title_prefix = u''
|
||||
feed_link = item
|
||||
if feed_link.has_key('href'): # noqa
|
||||
if feed_link.get('href'):
|
||||
url = self.INDEX + feed_link['href']
|
||||
title_link = re.split('<br />', str(feed_link))
|
||||
if len(title_link) > 1:
|
||||
title = title_prefix + \
|
||||
re.sub(
|
||||
'\<.*\>', '', title_link[0]) + ' - ' + re.sub('\<.*\>', '', title_link[1])
|
||||
r'\<.*\>', '', title_link[0]) + ' - ' + re.sub(r'\<.*\>', '', title_link[1])
|
||||
else:
|
||||
title = title_prefix + self.tag_to_string(feed_link)
|
||||
desc = item.findNext('li')
|
||||
|
@ -133,7 +133,7 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
|
||||
class MerryPreProcess():
|
||||
|
||||
def optimizePicture(self, soup):
|
||||
for tag in soup.findAll(lambda tag: tag.name.lower() == 'img' and tag.has_key('src')): # noqa
|
||||
for tag in soup.findAll('img', src=True):
|
||||
try:
|
||||
iurl = tag['src']
|
||||
img = Image()
|
||||
|
@ -42,11 +42,10 @@ class MoneyControlRecipe(BasicNewsRecipe):
|
||||
h1.append(self.tag_to_string(headline))
|
||||
freshSoup.body.append(h1)
|
||||
|
||||
for p in soup.findAll('p'):
|
||||
if p.has_key('class'):
|
||||
if p['class'] == 'MsoNormal':
|
||||
# We have some weird pagebreak marker here; it will not find all of them however
|
||||
continue
|
||||
for p in soup.findAll('p', attrs={'class': true}):
|
||||
if p['class'] == 'MsoNormal':
|
||||
# We have some weird pagebreak marker here; it will not find all of them however
|
||||
continue
|
||||
|
||||
para = Tag(freshSoup, 'p')
|
||||
# Convert to string; this will loose all formatting but also all illegal markup
|
||||
|
@ -286,7 +286,7 @@ class CanWestPaper(BasicNewsRecipe):
|
||||
else:
|
||||
description = self.tag_to_string(dtag, False)
|
||||
print("DESCRIPTION: " + description)
|
||||
if not articles.has_key(key): # noqa
|
||||
if key not in articles:
|
||||
articles[key] = []
|
||||
articles[key].append(dict(
|
||||
title=title, url=url, date='', description=description, author='', content=''))
|
||||
@ -310,5 +310,5 @@ class CanWestPaper(BasicNewsRecipe):
|
||||
|
||||
for (k, url) in self.postmedia_index_pages:
|
||||
parse_web_index(k, url)
|
||||
ans = [(key, articles[key]) for key in ans if articles.has_key(key)] # noqa
|
||||
ans = [(key, articles[key]) for key in ans if key in articles]
|
||||
return ans
|
||||
|
@ -38,7 +38,7 @@ class naszdziennik(BasicNewsRecipe):
|
||||
section = self.tag_to_string(section)
|
||||
# sprawdzamy czy w słowniku artykułów istnieje klucz dotyczący sekcji
|
||||
# jeśli nie istnieje to :
|
||||
if not articles.has_key(section): # noqa
|
||||
if section not in articles:
|
||||
# do listy sekcji dodajemy nową sekcje
|
||||
sections.append(section)
|
||||
# deklarujemy nową sekcje w słowniku artykułów przypisując jej
|
||||
|
@ -77,7 +77,7 @@ class NrcNextRecipe(BasicNewsRecipe):
|
||||
# Add the article to a temporary list
|
||||
article = {'title': completeTitle, 'date': u'',
|
||||
'url': href, 'description': '<p> </p>'}
|
||||
if not articles.has_key(index): # noqa
|
||||
if index not in articles:
|
||||
articles[index] = []
|
||||
articles[index].append(article)
|
||||
|
||||
@ -90,7 +90,7 @@ class NrcNextRecipe(BasicNewsRecipe):
|
||||
indices, {u'columnisten': 1, u'koken': 3, u'geld & werk': 2, u'vandaag': 0})
|
||||
# Apply this sort order to the actual list of feeds and articles
|
||||
answer = [(key, articles[key])
|
||||
for key in indices if articles.has_key(key)] # noqa
|
||||
for key in indices if key in articles]
|
||||
|
||||
return answer
|
||||
|
||||
|
@ -201,8 +201,7 @@ class Newsweek(BasicNewsRecipe):
|
||||
self.DATE = matches.group(0)
|
||||
|
||||
# cover
|
||||
img = main_section.find(lambda tag: tag.name == 'img' and tag.has_key( # noqa
|
||||
'alt') and tag.has_key('title'))
|
||||
img = main_section.find('img', src=True, alt=True, title=True)
|
||||
self.cover_url = img['src']
|
||||
feeds = []
|
||||
articles = {}
|
||||
@ -233,7 +232,7 @@ class Newsweek(BasicNewsRecipe):
|
||||
if article is None:
|
||||
continue
|
||||
|
||||
if articles.has_key(section): # noqa
|
||||
if section in articles:
|
||||
articles[section].append(article)
|
||||
else:
|
||||
articles[section] = [article]
|
||||
|
@ -82,7 +82,6 @@ class Novosti(BasicNewsRecipe):
|
||||
else:
|
||||
str = self.tag_to_string(item)
|
||||
item.replaceWith(str)
|
||||
for item in soup.findAll('img'):
|
||||
if not item.has_key('alt'): # noqa
|
||||
item['alt'] = 'image'
|
||||
for item in soup.findAll('img', alt=False):
|
||||
item['alt'] = 'image'
|
||||
return soup
|
||||
|
@ -55,8 +55,8 @@ class NotSafeForWork(BasicNewsRecipe):
|
||||
def get_feeds(self):
|
||||
self.feeds = []
|
||||
soup = self.index_to_soup(self.SETTINGS)
|
||||
for item in soup.findAll('input', attrs={'type': 'text'}):
|
||||
if item.has_key('value') and item['value'].startswith('https://www.nsfwcorp.com/feed/'): # noqa
|
||||
for item in soup.findAll('input', value=True, attrs={'type': 'text'}):
|
||||
if item['value'].startswith('https://www.nsfwcorp.com/feed/'):
|
||||
self.feeds.append(item['value'])
|
||||
return self.feeds
|
||||
return self.feeds
|
||||
|
@ -109,7 +109,6 @@ class Nspm(BasicNewsRecipe):
|
||||
else:
|
||||
str = self.tag_to_string(item)
|
||||
item.replaceWith(str)
|
||||
for item in soup.findAll('img'):
|
||||
if not item.has_key('alt'): # noqa
|
||||
item['alt'] = 'image'
|
||||
for item in soup.findAll('img', alt=False):
|
||||
item['alt'] = 'image'
|
||||
return soup
|
||||
|
@ -286,7 +286,7 @@ class CanWestPaper(BasicNewsRecipe):
|
||||
else:
|
||||
description = self.tag_to_string(dtag, False)
|
||||
print("DESCRIPTION: " + description)
|
||||
if not articles.has_key(key): # noqa
|
||||
if key not in articles:
|
||||
articles[key] = []
|
||||
articles[key].append(dict(
|
||||
title=title, url=url, date='', description=description, author='', content=''))
|
||||
@ -310,5 +310,5 @@ class CanWestPaper(BasicNewsRecipe):
|
||||
|
||||
for (k, url) in self.postmedia_index_pages:
|
||||
parse_web_index(k, url)
|
||||
ans = [(key, articles[key]) for key in ans if articles.has_key(key)] # noqa
|
||||
ans = [(key, articles[key]) for key in ans if key in articles]
|
||||
return ans
|
||||
|
@ -86,8 +86,8 @@ class Pobjeda(BasicNewsRecipe):
|
||||
soup = self.index_to_soup(feedurl)
|
||||
for item in soup.findAll('div', attrs={'class': 'vijest'}):
|
||||
description = self.tag_to_string(item.h2)
|
||||
atag = item.h1.find('a')
|
||||
if atag and atag.has_key('href'): # noqa
|
||||
atag = item.h1.find('a', href=True)
|
||||
if atag is not None:
|
||||
url = self.INDEX + '/' + atag['href']
|
||||
title = self.tag_to_string(atag)
|
||||
date = strftime(self.timefmt)
|
||||
|
@ -58,10 +58,9 @@ class Politika(BasicNewsRecipe):
|
||||
del item['style']
|
||||
for item in soup.findAll('a', attrs={'class': 'category'}):
|
||||
item.name = 'span'
|
||||
if item.has_key('href'): # noqa
|
||||
del item['href']
|
||||
if item.has_key('title'): # noqa
|
||||
del item['title']
|
||||
item['href'] = item['title'] = ''
|
||||
del item['href']
|
||||
del item['title']
|
||||
return soup
|
||||
|
||||
def get_cover_url(self):
|
||||
|
@ -51,7 +51,7 @@ class Polityka(BasicNewsRecipe):
|
||||
'http://archiwum.polityka.pl' + div.a['href'],)
|
||||
section = self.tag_to_string(article_page.find(
|
||||
'h2', attrs={'class': 'box_nag'})).split('/')[0].lstrip().rstrip()
|
||||
if not articles.has_key(section): # noqa
|
||||
if section not in articles:
|
||||
articles[section] = []
|
||||
articles[section].append({
|
||||
'title': self.tag_to_string(div.a),
|
||||
|
@ -190,7 +190,7 @@ class CanWestPaper(BasicNewsRecipe):
|
||||
# Find each instance of class="sectiontitle", class="featurecontent"
|
||||
for divtag in soup.findAll('div', attrs={'class': ["section_title02", "featurecontent"]}):
|
||||
if divtag['class'].startswith('section_title'):
|
||||
# div contains section title
|
||||
# div contains section title
|
||||
if not divtag.h3:
|
||||
continue
|
||||
key = self.tag_to_string(divtag.h3, False)
|
||||
@ -215,11 +215,11 @@ class CanWestPaper(BasicNewsRecipe):
|
||||
autag = divtag.find('h4')
|
||||
if autag:
|
||||
author = self.tag_to_string(autag, False)
|
||||
if not articles.has_key(key): # noqa
|
||||
if key not in articles:
|
||||
articles[key] = []
|
||||
articles[key].append(dict(title=title, url=url, date=pubdate,
|
||||
description=description, author=author, content=''))
|
||||
|
||||
ans = [(keyl, articles[keyl])
|
||||
for keyl in ans if articles.has_key(keyl)] # noqa
|
||||
for keyl in ans if keyl in articles]
|
||||
return ans
|
||||
|
@ -46,8 +46,8 @@ class Republika(BasicNewsRecipe):
|
||||
for item in soup.body.findAll(name=['table', 'td', 'tr', 'th', 'caption', 'thead', 'tfoot', 'tbody', 'colgroup', 'col']):
|
||||
item.name = 'div'
|
||||
for attrib in attribs:
|
||||
if item.has_key(attrib): # noqa
|
||||
del item[attrib]
|
||||
item[attrib] = ''
|
||||
del item[attrib]
|
||||
return soup
|
||||
|
||||
def parse_index(self):
|
||||
|
@ -190,7 +190,7 @@ class CanWestPaper(BasicNewsRecipe):
|
||||
# Find each instance of class="sectiontitle", class="featurecontent"
|
||||
for divtag in soup.findAll('div', attrs={'class': ["section_title02", "featurecontent"]}):
|
||||
if divtag['class'].startswith('section_title'):
|
||||
# div contains section title
|
||||
# div contains section title
|
||||
if not divtag.h3:
|
||||
continue
|
||||
key = self.tag_to_string(divtag.h3, False)
|
||||
@ -215,10 +215,10 @@ class CanWestPaper(BasicNewsRecipe):
|
||||
autag = divtag.find('h4')
|
||||
if autag:
|
||||
author = self.tag_to_string(autag, False)
|
||||
if not articles.has_key(key): # noqa
|
||||
if key not in articles:
|
||||
articles[key] = []
|
||||
articles[key].append(dict(title=title, url=url, date=pubdate,
|
||||
description=description, author=author, content=''))
|
||||
|
||||
ans = [(k, articles[k]) for k in ans if articles.has_key(k)] # noqa
|
||||
ans = [(k, articles[k]) for k in ans if k in articles]
|
||||
return ans
|
||||
|
@ -45,11 +45,11 @@ class SCPrintMagazine(BasicNewsRecipe):
|
||||
if arttitlet is not None:
|
||||
mylink = arttitlet.find('a')
|
||||
if mylink is not None:
|
||||
if mylink.has_key('title'): # noqa
|
||||
if mylink.get('title'):
|
||||
arttitle = mylink['title']
|
||||
else:
|
||||
arttitle = 'unknown'
|
||||
if mylink.has_key('href'): # noqa
|
||||
if mylink.get('href'):
|
||||
artlink = mylink['href']
|
||||
artlink = artlink.replace(
|
||||
"/article", "/printarticle")
|
||||
|
@ -81,7 +81,7 @@ class SZmobil(BasicNewsRecipe):
|
||||
if itt['href'].startswith('article.php?id='):
|
||||
article_url = itt['href']
|
||||
article_id = int(
|
||||
re.search("id=(\d*)&etag=", itt['href']).group(1))
|
||||
re.search(r"id=(\d*)&etag=", itt['href']).group(1))
|
||||
|
||||
# first check if link is a special article in section
|
||||
# "Meinungsseite"
|
||||
@ -104,7 +104,7 @@ class SZmobil(BasicNewsRecipe):
|
||||
# just another link ("mehr") to an article
|
||||
continue
|
||||
|
||||
if itt.has_key('id'): # noqa
|
||||
if itt.get('id') is not None:
|
||||
shorttitles[article_id] = article_name
|
||||
else:
|
||||
articles.append(
|
||||
@ -118,7 +118,7 @@ class SZmobil(BasicNewsRecipe):
|
||||
# pubdate = strftime('')
|
||||
pubdate = strftime('[%a, %d %b]')
|
||||
description = ''
|
||||
if shorttitles.has_key(article_id): # noqa
|
||||
if shorttitles.get(article_id) is not None:
|
||||
description = shorttitles[article_id]
|
||||
# we do not want the flag ("Impressum")
|
||||
if "HERAUSGEGEBEN VOM" in description:
|
||||
|
@ -55,7 +55,6 @@ class TechCrunch(BasicNewsRecipe):
|
||||
else:
|
||||
str = self.tag_to_string(item)
|
||||
item.replaceWith(str)
|
||||
for item in soup.findAll('img'):
|
||||
if not item.has_key('alt'): # noqa
|
||||
item['alt'] = 'image'
|
||||
for item in soup.findAll('img', alt=False):
|
||||
item['alt'] = 'image'
|
||||
return soup
|
||||
|
@ -45,7 +45,7 @@ class TheAge(BasicNewsRecipe):
|
||||
|
||||
# Make sure to skip: <a href="/">TheAge</a>
|
||||
|
||||
elif section and tag.has_key('href') and len(tag['href'].strip()) > 1: # noqa
|
||||
elif section and tag.get('href'):
|
||||
url = tag['href'].strip()
|
||||
if url.startswith('/'):
|
||||
url = 'http://www.theage.com.au' + url
|
||||
@ -105,7 +105,7 @@ class TheAge(BasicNewsRecipe):
|
||||
|
||||
# Filter out what's left of the text-mode navigation stuff
|
||||
|
||||
if re.match('((\s)|(\ \;))*\[[\|\s*]*\]((\s)|(\ \;))*$', contents):
|
||||
if re.match(r'((\s)|(\ \;))*\[[\|\s*]*\]((\s)|(\ \;))*$', contents):
|
||||
p.extract()
|
||||
continue
|
||||
|
||||
|
@ -47,6 +47,6 @@ class Edgesingapore(BasicNewsRecipe):
|
||||
for item in soup.body.findAll(name=['table', 'td', 'tr', 'th', 'caption', 'thead', 'tfoot', 'tbody', 'colgroup', 'col']):
|
||||
item.name = 'div'
|
||||
for attrib in attribs:
|
||||
if item.has_key(attrib): # noqa
|
||||
del item[attrib]
|
||||
item[attrib] = ''
|
||||
del item[attrib]
|
||||
return self.adeify_images(soup)
|
||||
|
@ -78,12 +78,12 @@ class TheOnion(BasicNewsRecipe):
|
||||
if limg:
|
||||
item.name = 'div'
|
||||
item.attrs = []
|
||||
if not limg.has_key('alt'): # noqa
|
||||
if not limg.get('alt'):
|
||||
limg['alt'] = 'image'
|
||||
else:
|
||||
str = self.tag_to_string(item)
|
||||
item.replaceWith(str)
|
||||
for item in soup.findAll('img'):
|
||||
if item.has_key('data-src'): # noqa
|
||||
if item.get('data-src'):
|
||||
item['src'] = item['data-src']
|
||||
return soup
|
||||
|
@ -57,8 +57,8 @@ class Tomshardware(BasicNewsRecipe):
|
||||
def cleanup_image_tags(self, soup):
|
||||
for item in soup.findAll('img'):
|
||||
for attrib in ['height', 'width', 'border', 'align']:
|
||||
if item.has_key(attrib): # noqa
|
||||
del item[attrib]
|
||||
item[attrib] = ''
|
||||
del item[attrib]
|
||||
return soup
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
|
@ -45,7 +45,6 @@ class Twitchfilm(BasicNewsRecipe):
|
||||
else:
|
||||
str = self.tag_to_string(item)
|
||||
item.replaceWith(str)
|
||||
for item in soup.findAll('img'):
|
||||
if not item.has_key('alt'): # noqa
|
||||
item['alt'] = 'image'
|
||||
for item in soup.findAll('img', alt=False):
|
||||
item['alt'] = 'image'
|
||||
return soup
|
||||
|
@ -299,7 +299,7 @@ class CanWestPaper(BasicNewsRecipe):
|
||||
else:
|
||||
description = self.tag_to_string(dtag, False)
|
||||
print("DESCRIPTION: " + description)
|
||||
if not articles.has_key(key): # noqa
|
||||
if key not in articles:
|
||||
articles[key] = []
|
||||
articles[key].append(dict(
|
||||
title=title, url=url, date='', description=description, author='', content=''))
|
||||
@ -323,5 +323,5 @@ class CanWestPaper(BasicNewsRecipe):
|
||||
|
||||
for (k, url) in self.postmedia_index_pages:
|
||||
parse_web_index(k, url)
|
||||
ans = [(key, articles[key]) for key in ans if articles.has_key(key)] # noqa
|
||||
ans = [(key, articles[key]) for key in ans if key in articles]
|
||||
return ans
|
||||
|
@ -287,7 +287,7 @@ class CanWestPaper(BasicNewsRecipe):
|
||||
else:
|
||||
description = self.tag_to_string(dtag, False)
|
||||
print("DESCRIPTION: " + description)
|
||||
if not articles.has_key(key): # noqa
|
||||
if key not in articles:
|
||||
articles[key] = []
|
||||
articles[key].append(dict(
|
||||
title=title, url=url, date='', description=description, author='', content=''))
|
||||
@ -311,5 +311,5 @@ class CanWestPaper(BasicNewsRecipe):
|
||||
|
||||
for (k, url) in self.postmedia_index_pages:
|
||||
parse_web_index(k, url)
|
||||
ans = [(key, articles[key]) for key in ans if articles.has_key(key)] # noqa
|
||||
ans = [(key, articles[key]) for key in ans if key in articles] # noqa
|
||||
return ans
|
||||
|
@ -62,7 +62,6 @@ class Variety(BasicNewsRecipe):
|
||||
else:
|
||||
str = self.tag_to_string(item)
|
||||
item.replaceWith(str)
|
||||
for item in soup.findAll('img'):
|
||||
if not item.has_key('alt'): # noqa
|
||||
item['alt'] = 'image'
|
||||
for item in soup.findAll('img', alt=False):
|
||||
item['alt'] = 'image'
|
||||
return soup
|
||||
|
@ -62,16 +62,22 @@ class VedomostiRecipe(BasicNewsRecipe):
|
||||
if not feedData:
|
||||
raise NotImplementedError
|
||||
self.log("parse_index: Feed loaded successfully.")
|
||||
if feedData.feed.has_key('title'): # noqa
|
||||
self.title = feedData.feed.title
|
||||
self.log("parse_index: Title updated to: ", self.title)
|
||||
if feedData.feed.has_key('description'): # noqa
|
||||
self.description = feedData.feed.description
|
||||
self.log("parse_index: Description updated to: ",
|
||||
self.description)
|
||||
try:
|
||||
if feedData.feed.title:
|
||||
self.title = feedData.feed.title
|
||||
self.log("parse_index: Title updated to: ", self.title)
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
if feedData.feed.description:
|
||||
self.description = feedData.feed.description
|
||||
self.log("parse_index: Description updated to: ",
|
||||
self.description)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def get_virtual_feed_articles(feed):
|
||||
if feeds.has_key(feed): # noqa
|
||||
if feed in feeds:
|
||||
return feeds[feed][1]
|
||||
self.log("Adding new feed: ", feed)
|
||||
articles = []
|
||||
@ -88,7 +94,7 @@ class VedomostiRecipe(BasicNewsRecipe):
|
||||
continue
|
||||
article = {'title': title, 'url': link, 'description': item.get(
|
||||
'description', ''), 'date': item.get('date', ''), 'content': ''}
|
||||
if not item.has_key('tags'): # noqa
|
||||
if not item.get('tags'): # noqa
|
||||
get_virtual_feed_articles('_default').append(article)
|
||||
continue
|
||||
for tag in item.tags:
|
||||
@ -105,7 +111,7 @@ class VedomostiRecipe(BasicNewsRecipe):
|
||||
# Select sorted feeds first of all
|
||||
result = []
|
||||
for feedName in self.sortOrder:
|
||||
if (not feeds.has_key(feedName)): # noqa
|
||||
if (not feeds.get(feedName)):
|
||||
continue
|
||||
result.append(feeds[feedName])
|
||||
del feeds[feedName]
|
||||
@ -142,9 +148,9 @@ class VedomostiRecipe(BasicNewsRecipe):
|
||||
imgDiv = Tag(soup, 'div')
|
||||
imgDiv['class'] = 'article_img'
|
||||
|
||||
if img.has_key('width'): # noqa
|
||||
if img.get('width'):
|
||||
del(img['width'])
|
||||
if img.has_key('height'): # noqa
|
||||
if img.get('height'):
|
||||
del(img['height'])
|
||||
|
||||
# find description
|
||||
@ -180,11 +186,9 @@ class VedomostiRecipe(BasicNewsRecipe):
|
||||
contents.insert(len(contents.contents), authorsP)
|
||||
|
||||
# Fix urls that use relative path
|
||||
urls = contents.findAll('a')
|
||||
urls = contents.findAll('a', href=True)
|
||||
if urls:
|
||||
for url in urls:
|
||||
if not url.has_key('href'): # noqa
|
||||
continue
|
||||
if '/' == url['href'][0]:
|
||||
url['href'] = self.base_url + url['href']
|
||||
|
||||
|
@ -94,8 +94,8 @@ class WaPoCartoonsRecipe(BasicNewsRecipe):
|
||||
img = soup.find('img', attrs={'class': 'pic_big'})
|
||||
if img:
|
||||
td = img.parent
|
||||
if td.has_key('style'): # noqa
|
||||
del td['style']
|
||||
td['style'] = ''
|
||||
del td['style']
|
||||
td.name = 'div'
|
||||
td['id'] = 'comic_full'
|
||||
freshSoup.body.append(td)
|
||||
@ -134,11 +134,8 @@ class WaPoCartoonsRecipe(BasicNewsRecipe):
|
||||
'June': '06', 'July': '07', 'August': '08', 'September': '09', 'October': '10',
|
||||
'November': '11', 'December': '12'}
|
||||
|
||||
opts = select.findAll('option')
|
||||
opts = select.findAll('option', selected=False)
|
||||
for i in range(1, len(opts)):
|
||||
if opts[i].has_key('selected'): # noqa
|
||||
continue
|
||||
|
||||
dateString = self.tag_to_string(opts[i])
|
||||
rest, sep, year = dateString.rpartition(', ')
|
||||
parts = rest.split(' ')
|
||||
|
@ -42,11 +42,11 @@ class TheCND(BasicNewsRecipe):
|
||||
url = 'http://bbs.wenxuecity.com' + url
|
||||
title = self.tag_to_string(a)
|
||||
self.log('\tFound article: ', title, ' at:', url)
|
||||
dateReg = re.search('(\d\d?)/(\d\d?)/(\d\d)',
|
||||
dateReg = re.search(r'(\d\d?)/(\d\d?)/(\d\d)',
|
||||
self.tag_to_string(a.parent))
|
||||
date = '%(y)s/%(m)02d/%(d)02d' % {'y': dateReg.group(3),
|
||||
'm': int(dateReg.group(1)), 'd': int(dateReg.group(2))}
|
||||
if not articles.has_key(date): # noqa
|
||||
if date not in articles: # noqa
|
||||
articles[date] = []
|
||||
articles[date].append(
|
||||
{'title': title, 'url': url, 'description': '', 'date': ''})
|
||||
|
@ -96,10 +96,10 @@ class CanWestPaper(BasicNewsRecipe):
|
||||
autag = divtag.find('h4')
|
||||
if autag:
|
||||
author = self.tag_to_string(autag, False)
|
||||
if not articles.has_key(key): # noqa
|
||||
if key not in articles:
|
||||
articles[key] = []
|
||||
articles[key].append(dict(title=title, url=url, date=pubdate,
|
||||
description=description, author=author, content=''))
|
||||
|
||||
ans = [(keyl, articles[key]) for keyl in ans if articles.has_key(keyl)] # noqa
|
||||
ans = [(keyl, articles[key]) for keyl in ans if keyl in articles]
|
||||
return ans
|
||||
|
@ -189,9 +189,9 @@ class CanWestPaper(BasicNewsRecipe):
|
||||
|
||||
# Find each instance of class="sectiontitle", class="featurecontent"
|
||||
for divtag in soup.findAll('div', attrs={'class': ["section_title02", "featurecontent"]}):
|
||||
# self.log(" div class = %s" % divtag['class'])
|
||||
# self.log(" div class = %s" % divtag['class'])
|
||||
if divtag['class'].startswith('section_title'):
|
||||
# div contains section title
|
||||
# div contains section title
|
||||
if not divtag.h3:
|
||||
continue
|
||||
key = self.tag_to_string(divtag.h3, False)
|
||||
@ -221,11 +221,11 @@ class CanWestPaper(BasicNewsRecipe):
|
||||
if autag:
|
||||
author = self.tag_to_string(autag, False)
|
||||
# self.log("author %s" % author)
|
||||
if not articles.has_key(key): # noqa
|
||||
if key not in articles:
|
||||
articles[key] = []
|
||||
articles[key].append(dict(title=title, url=url, date=pubdate,
|
||||
description=description, author=author, content=''))
|
||||
|
||||
ans = [(keyl, articles[keyl])
|
||||
for keyl in ans if articles.has_key(keyl)] # noqa
|
||||
for keyl in ans if keyl in articles]
|
||||
return ans
|
||||
|
@ -83,11 +83,10 @@ class ZAOBAO(BasicNewsRecipe):
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for tag in soup.findAll(name='a'):
|
||||
if tag.has_key('href'): # noqa
|
||||
tag_url = tag['href']
|
||||
if tag_url.find('http://') != -1 and tag_url.find('zaobao.com') == -1:
|
||||
del tag['href']
|
||||
for tag in soup.findAll(name='a', href=True):
|
||||
tag_url = tag['href']
|
||||
if tag_url.find('http://') != -1 and tag_url.find('zaobao.com') == -1:
|
||||
del tag['href']
|
||||
return soup
|
||||
|
||||
def postprocess_html(self, soup, first):
|
||||
@ -107,8 +106,8 @@ class ZAOBAO(BasicNewsRecipe):
|
||||
for i, item in enumerate(soup.findAll('li')):
|
||||
if i >= self.MAX_ITEMS_IN_INDEX:
|
||||
break
|
||||
a = item.find('a')
|
||||
if a and a.has_key('href'): # noqa
|
||||
a = item.find('a', href=True)
|
||||
if a is not None:
|
||||
a_url = a['href']
|
||||
a_title = self.tag_to_string(a)
|
||||
date = ''
|
||||
|
Loading…
x
Reference in New Issue
Block a user