This commit is contained in:
Kovid Goyal 2014-11-27 13:11:54 +05:30
parent b524709809
commit e9c2e24155

View File

@ -82,31 +82,31 @@ class RealClear(BasicNewsRecipe):
return pageURL
if len(self.printhints[x][self.phHrefSearch])>0 and len(self.printhints[x][self.phLinkText]) == 0:
# e.g. RealClear
if self.debugMessages == True :
if self.debugMessages is True :
print("Search by href: "+self.printhints[x][self.phHrefSearch])
printFind = soup.find(href=re.compile(self.printhints[x][self.phHrefSearch]))
elif len(self.printhints[x][3])>0 and len(self.printhints[x][1]) == 0:
if self.debugMessages == True :
if self.debugMessages is True :
print("Search 1: "+self.printhints[x][2]+" Attributes: ")
print(self.printhints[x][3])
printFind = soup.find(self.printhints[x][2], attrs=self.printhints[x][3])
elif len(self.printhints[x][3])>0 :
if self.debugMessages == True :
if self.debugMessages is True :
print("search2")
printFind = soup.find(self.printhints[x][2], attrs=self.printhints[x][3], text=self.printhints[x][1])
else :
if self.debugMessages == True:
if self.debugMessages is True:
print("Default Search: "+self.printhints[x][2]+" Text: "+self.printhints[x][1])
printFind = soup.find(self.printhints[x][2], text=self.printhints[x][1])
if printFind is None:
if self.debugMessages == True :
if self.debugMessages is True :
print("Not Found")
# print(soup)
print("end soup\n\n");
print("end soup\n\n")
continue
print(printFind)
if isinstance(printFind, NavigableString)==False:
if isinstance(printFind, NavigableString) is False:
if printFind['href'] is not None:
print("Check "+printFind['href']+" for base of "+baseURL)
if printFind['href'].find("http")!=0 :
@ -115,24 +115,24 @@ class RealClear(BasicNewsRecipe):
tag = printFind.parent
print(tag)
if tag['href'] is None:
if self.debugMessages == True :
if self.debugMessages is True :
print("Not in parent, trying skip-up")
if tag.parent['href'] is None:
if self.debugMessages == True :
if self.debugMessages is True :
print("Not in skip either, aborting")
continue;
continue
return tag.parent['href']
return tag['href']
return tagURL
def get_browser(self):
if self.debugMessages == True :
if self.debugMessages is True :
print("In get_browser")
br = BasicNewsRecipe.get_browser(self)
return br
def parseRSS(self, index) :
if self.debugMessages == True :
if self.debugMessages is True :
print("\n\nStarting "+self.feedsets[index][0])
articleList = []
soup = self.index_to_soup(self.feedsets[index][1])
@ -164,8 +164,8 @@ class RealClear(BasicNewsRecipe):
pubDate = time.strftime('%a, %d %b')
else :
pubDate = pubDateEl.contents[0]
if self.debugMessages == True :
print("Article");
if self.debugMessages is True :
print("Article")
print(title)
print(description)
print(pubDate)
@ -197,8 +197,6 @@ class RealClear(BasicNewsRecipe):
feedarticles = self.parseRSS(x)
if feedarticles is not None:
ans.append((self.feedsets[x][0], feedarticles))
if self.debugMessages == True :
if self.debugMessages is True :
print(ans)
return ans