mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Update NYTimes recipe
This commit is contained in:
parent
a3ee07a2da
commit
3fd23ceadd
@ -41,7 +41,7 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
# number of days old an article can be for inclusion. If oldest_web_article = None all articles
|
# number of days old an article can be for inclusion. If oldest_web_article = None all articles
|
||||||
# will be included. Note: oldest_web_article is ignored if webEdition = False
|
# will be included. Note: oldest_web_article is ignored if webEdition = False
|
||||||
webEdition = False
|
webEdition = False
|
||||||
oldest_web_article = 7
|
oldest_web_article = None
|
||||||
|
|
||||||
# download higher resolution images than the small thumbnails typically included in the article
|
# download higher resolution images than the small thumbnails typically included in the article
|
||||||
# the down side of having large beautiful images is the file size is much larger, on the order of 7MB per paper
|
# the down side of having large beautiful images is the file size is much larger, on the order of 7MB per paper
|
||||||
@ -188,6 +188,8 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
'relatedSearchesModule',
|
'relatedSearchesModule',
|
||||||
'side_tool',
|
'side_tool',
|
||||||
'singleAd',
|
'singleAd',
|
||||||
|
'postCategory column',
|
||||||
|
'refer tagRefer', # added for bits blog post
|
||||||
'entry entry-utility', #added for DealBook
|
'entry entry-utility', #added for DealBook
|
||||||
'entry-tags', #added for DealBook
|
'entry-tags', #added for DealBook
|
||||||
'footer promos clearfix', #added for DealBook
|
'footer promos clearfix', #added for DealBook
|
||||||
@ -324,6 +326,8 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
return True
|
return True
|
||||||
if '/video/' in url:
|
if '/video/' in url:
|
||||||
return True
|
return True
|
||||||
|
if '/multimedia/' in url:
|
||||||
|
return True
|
||||||
if '/slideshow/' in url:
|
if '/slideshow/' in url:
|
||||||
return True
|
return True
|
||||||
if '/magazine/index' in url:
|
if '/magazine/index' in url:
|
||||||
@ -334,6 +338,15 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
return True
|
return True
|
||||||
if '/premium/' in url:
|
if '/premium/' in url:
|
||||||
return True
|
return True
|
||||||
|
if '#comment' in url:
|
||||||
|
return True
|
||||||
|
if '#postComment' in url:
|
||||||
|
return True
|
||||||
|
if '#postcomment' in url:
|
||||||
|
return True
|
||||||
|
if re.search('/\d\d\d\d/\d\d/\d\d/',url) is None:
|
||||||
|
print("NO DATE IN "+url)
|
||||||
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def fixChars(self,string):
|
def fixChars(self,string):
|
||||||
@ -363,6 +376,7 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
|
|
||||||
cover_tag = 'NY_NYT'
|
cover_tag = 'NY_NYT'
|
||||||
def get_cover_url(self):
|
def get_cover_url(self):
|
||||||
|
from datetime import timedelta, date
|
||||||
cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.cover_tag+'.jpg'
|
cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.cover_tag+'.jpg'
|
||||||
br = BasicNewsRecipe.get_browser(self)
|
br = BasicNewsRecipe.get_browser(self)
|
||||||
daysback=1
|
daysback=1
|
||||||
@ -385,7 +399,6 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
|
|
||||||
masthead_url = 'http://graphics8.nytimes.com/images/misc/nytlogo379x64.gif'
|
masthead_url = 'http://graphics8.nytimes.com/images/misc/nytlogo379x64.gif'
|
||||||
|
|
||||||
|
|
||||||
def short_title(self):
|
def short_title(self):
|
||||||
return self.title
|
return self.title
|
||||||
|
|
||||||
@ -647,75 +660,53 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
|
|
||||||
soup = self.index_to_soup('http://www.nytimes.com/pages/todaysheadlines/')
|
soup = self.index_to_soup('http://www.nytimes.com/pages/todaysheadlines/')
|
||||||
|
|
||||||
# Fetch the content table
|
section_name='Unknown Section'
|
||||||
content_table = soup.find('table',{'id':'content'})
|
pubdate = strftime('%a, %d %b')
|
||||||
if content_table is None:
|
for td_col in soup.findAll('td'):
|
||||||
self.log("FATAL ERROR: CANNOT FIND CONTENT TABLE")
|
h6_sec_name = td_col.find('h6')
|
||||||
return None
|
if h6_sec_name is not None:
|
||||||
|
new_section_name = self.tag_to_string(h6_sec_name,use_alt=False)
|
||||||
# Within this table are <td id=".*Column.*"> entries, each containing one or more h6 tags which represent sections
|
new_section_name = re.sub(r'^ *$','',new_section_name)
|
||||||
|
if new_section_name == '':
|
||||||
for td_col in content_table.findAll('td', {'id' : re.compile('Column')}):
|
continue
|
||||||
for div_sec in td_col.findAll('div',recursive=False):
|
section_name = new_section_name
|
||||||
for h6_sec_name in div_sec.findAll('h6',{'style' : re.compile('text-transform: *uppercase')}):
|
continue
|
||||||
|
atag = td_col.find('a')
|
||||||
section_name = self.tag_to_string(h6_sec_name,use_alt=False)
|
if atag is not None:
|
||||||
section_name = re.sub(r'^ *$','',section_name)
|
h4tag = None
|
||||||
|
for h4tag in atag.findNextSiblings('h4'):
|
||||||
if section_name == '':
|
break
|
||||||
|
if h4tag is None:
|
||||||
|
continue
|
||||||
|
author = self.tag_to_string(h4tag,use_alt=False)
|
||||||
|
try:
|
||||||
|
url = re.sub(r'\?.*', '', atag['href'])
|
||||||
|
except:
|
||||||
|
continue
|
||||||
|
if self.exclude_url(url):
|
||||||
|
continue
|
||||||
|
if '?' in url:
|
||||||
|
url += '&pagewanted=all'
|
||||||
|
else:
|
||||||
|
url += '?pagewanted=all'
|
||||||
|
if self.filterDuplicates:
|
||||||
|
if url in self.url_list:
|
||||||
continue
|
continue
|
||||||
if self.includeSections != []:
|
self.url_list.append(url)
|
||||||
if section_name not in self.includeSections:
|
title = self.tag_to_string(atag, use_alt=False).strip()
|
||||||
print "SECTION NOT INCLUDED: ",section_name
|
desc = atag.parent.find('p')
|
||||||
continue
|
if desc is not None:
|
||||||
if section_name in self.excludeSections:
|
description = self.tag_to_string(desc,use_alt=False)
|
||||||
print "SECTION EXCLUDED: ",section_name
|
else:
|
||||||
continue
|
description = ''
|
||||||
|
if not self.articles.has_key(section_name):
|
||||||
section_name=string.capwords(section_name)
|
self.ans.append(section_name)
|
||||||
section_name = section_name.replace('Op-ed','Op-Ed')
|
self.articles[section_name] = []
|
||||||
section_name = section_name.replace('U.s.','U.S.')
|
print('Title '+title+' author '+author)
|
||||||
section_name = section_name.replace('N.y.','N.Y.')
|
self.articles[section_name].append(dict(title=title, url=url, date=pubdate, description=description, author=author, content=''))
|
||||||
pubdate = strftime('%a, %d %b')
|
|
||||||
|
|
||||||
search_div = div_sec
|
|
||||||
for next_tag in h6_sec_name.findNextSiblings(True):
|
|
||||||
if next_tag.__class__.__name__ == 'Tag':
|
|
||||||
if next_tag.name == 'div':
|
|
||||||
search_div = next_tag
|
|
||||||
break
|
|
||||||
|
|
||||||
# Get the articles
|
|
||||||
for h3_item in search_div.findAll('h3'):
|
|
||||||
byline = h3_item.h6
|
|
||||||
if byline is not None:
|
|
||||||
author = self.tag_to_string(byline,use_alt=False)
|
|
||||||
else:
|
|
||||||
author = ''
|
|
||||||
a = h3_item.find('a', href=True)
|
|
||||||
if not a:
|
|
||||||
continue
|
|
||||||
url = re.sub(r'\?.*', '', a['href'])
|
|
||||||
if self.exclude_url(url):
|
|
||||||
continue
|
|
||||||
url += '?pagewanted=all'
|
|
||||||
if self.filterDuplicates:
|
|
||||||
if url in self.url_list:
|
|
||||||
continue
|
|
||||||
self.url_list.append(url)
|
|
||||||
title = self.tag_to_string(a, use_alt=True).strip()
|
|
||||||
desc = h3_item.find('p')
|
|
||||||
if desc is not None:
|
|
||||||
description = self.tag_to_string(desc,use_alt=False)
|
|
||||||
else:
|
|
||||||
description = ''
|
|
||||||
if not self.articles.has_key(section_name):
|
|
||||||
self.ans.append(section_name)
|
|
||||||
self.articles[section_name] = []
|
|
||||||
self.articles[section_name].append(dict(title=title, url=url, date=pubdate, description=description, author=author, content=''))
|
|
||||||
|
|
||||||
self.ans = [(k, self.articles[k]) for k in self.ans if self.articles.has_key(k)]
|
self.ans = [(k, self.articles[k]) for k in self.ans if self.articles.has_key(k)]
|
||||||
return self.filter_ans(self.get_tech_feeds(self.get_popular_articles(self.ans)))
|
return self.filter_ans(self.ans)
|
||||||
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
if self.headlinesOnly:
|
if self.headlinesOnly:
|
||||||
@ -825,8 +816,9 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
for divr in soup.findAll('div',attrs={'class':re.compile('w190 right')}):
|
for divr in soup.findAll('div',attrs={'class':re.compile('w190 right')}):
|
||||||
if divr.find(text=re.compile('Sign up')):
|
if divr.find(text=re.compile('Sign up')):
|
||||||
divr.extract()
|
divr.extract()
|
||||||
divr = soup.find('div',attrs={'id':re.compile('related-content')})
|
divr = soup.find('div',attrs={'class':re.compile('^relatedArticlesModule')})
|
||||||
if divr is not None:
|
if divr is not None:
|
||||||
|
print("PROCESSING RELATED: "+self.tag_to_string(soup.title,False))
|
||||||
# handle related articles
|
# handle related articles
|
||||||
rlist = []
|
rlist = []
|
||||||
ul = divr.find('ul')
|
ul = divr.find('ul')
|
||||||
@ -856,6 +848,8 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
asidediv.append(Tag(soup,'hr'))
|
asidediv.append(Tag(soup,'hr'))
|
||||||
smain = soup.find('body')
|
smain = soup.find('body')
|
||||||
smain.append(asidediv)
|
smain.append(asidediv)
|
||||||
|
else:
|
||||||
|
print("CANNOT FIND RELATED: "+self.tag_to_string(soup.title,False))
|
||||||
for atag in soup.findAll('a'):
|
for atag in soup.findAll('a'):
|
||||||
img = atag.find('img')
|
img = atag.find('img')
|
||||||
if img is not None:
|
if img is not None:
|
||||||
@ -898,6 +892,18 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
first_outer = outerdiv
|
first_outer = outerdiv
|
||||||
else:
|
else:
|
||||||
litag.extract()
|
litag.extract()
|
||||||
|
for h6tag in rdiv.findAll('h6'):
|
||||||
|
if h6tag.find('a') is not None:
|
||||||
|
if h6tag.find('a')['href'].startswith('http://www.nytimes.com'):
|
||||||
|
url = re.sub(r'\?.*', '', h6tag.find('a')['href'])
|
||||||
|
h6tag.find('a')['href'] = url+'?pagewanted=all'
|
||||||
|
h6tag.extract()
|
||||||
|
related.append(h6tag)
|
||||||
|
if first_related is None:
|
||||||
|
first_related = rdiv
|
||||||
|
first_outer = outerdiv
|
||||||
|
else:
|
||||||
|
h6tag.extract()
|
||||||
if related != []:
|
if related != []:
|
||||||
for r in related:
|
for r in related:
|
||||||
if r.h6: # don't want the anchor inside a h6 tag
|
if r.h6: # don't want the anchor inside a h6 tag
|
||||||
|
@ -188,6 +188,8 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
'relatedSearchesModule',
|
'relatedSearchesModule',
|
||||||
'side_tool',
|
'side_tool',
|
||||||
'singleAd',
|
'singleAd',
|
||||||
|
'postCategory column',
|
||||||
|
'refer tagRefer', # added for bits blog post
|
||||||
'entry entry-utility', #added for DealBook
|
'entry entry-utility', #added for DealBook
|
||||||
'entry-tags', #added for DealBook
|
'entry-tags', #added for DealBook
|
||||||
'footer promos clearfix', #added for DealBook
|
'footer promos clearfix', #added for DealBook
|
||||||
@ -324,6 +326,8 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
return True
|
return True
|
||||||
if '/video/' in url:
|
if '/video/' in url:
|
||||||
return True
|
return True
|
||||||
|
if '/multimedia/' in url:
|
||||||
|
return True
|
||||||
if '/slideshow/' in url:
|
if '/slideshow/' in url:
|
||||||
return True
|
return True
|
||||||
if '/magazine/index' in url:
|
if '/magazine/index' in url:
|
||||||
@ -334,6 +338,15 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
return True
|
return True
|
||||||
if '/premium/' in url:
|
if '/premium/' in url:
|
||||||
return True
|
return True
|
||||||
|
if '#comment' in url:
|
||||||
|
return True
|
||||||
|
if '#postComment' in url:
|
||||||
|
return True
|
||||||
|
if '#postcomment' in url:
|
||||||
|
return True
|
||||||
|
if re.search('/\d\d\d\d/\d\d/\d\d/',url) is None:
|
||||||
|
print("NO DATE IN "+url)
|
||||||
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def fixChars(self,string):
|
def fixChars(self,string):
|
||||||
@ -371,6 +384,7 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
|
|
||||||
cover_tag = 'NY_NYT'
|
cover_tag = 'NY_NYT'
|
||||||
def get_cover_url(self):
|
def get_cover_url(self):
|
||||||
|
from datetime import timedelta, date
|
||||||
cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.cover_tag+'.jpg'
|
cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.cover_tag+'.jpg'
|
||||||
br = BasicNewsRecipe.get_browser(self)
|
br = BasicNewsRecipe.get_browser(self)
|
||||||
daysback=1
|
daysback=1
|
||||||
@ -393,7 +407,6 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
|
|
||||||
masthead_url = 'http://graphics8.nytimes.com/images/misc/nytlogo379x64.gif'
|
masthead_url = 'http://graphics8.nytimes.com/images/misc/nytlogo379x64.gif'
|
||||||
|
|
||||||
|
|
||||||
def short_title(self):
|
def short_title(self):
|
||||||
return self.title
|
return self.title
|
||||||
|
|
||||||
@ -655,75 +668,53 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
|
|
||||||
soup = self.index_to_soup('http://www.nytimes.com/pages/todaysheadlines/')
|
soup = self.index_to_soup('http://www.nytimes.com/pages/todaysheadlines/')
|
||||||
|
|
||||||
# Fetch the content table
|
section_name='Unknown Section'
|
||||||
content_table = soup.find('table',{'id':'content'})
|
pubdate = strftime('%a, %d %b')
|
||||||
if content_table is None:
|
for td_col in soup.findAll('td'):
|
||||||
self.log("FATAL ERROR: CANNOT FIND CONTENT TABLE")
|
h6_sec_name = td_col.find('h6')
|
||||||
return None
|
if h6_sec_name is not None:
|
||||||
|
new_section_name = self.tag_to_string(h6_sec_name,use_alt=False)
|
||||||
# Within this table are <td id=".*Column.*"> entries, each containing one or more h6 tags which represent sections
|
new_section_name = re.sub(r'^ *$','',new_section_name)
|
||||||
|
if new_section_name == '':
|
||||||
for td_col in content_table.findAll('td', {'id' : re.compile('Column')}):
|
continue
|
||||||
for div_sec in td_col.findAll('div',recursive=False):
|
section_name = new_section_name
|
||||||
for h6_sec_name in div_sec.findAll('h6',{'style' : re.compile('text-transform: *uppercase')}):
|
continue
|
||||||
|
atag = td_col.find('a')
|
||||||
section_name = self.tag_to_string(h6_sec_name,use_alt=False)
|
if atag is not None:
|
||||||
section_name = re.sub(r'^ *$','',section_name)
|
h4tag = None
|
||||||
|
for h4tag in atag.findNextSiblings('h4'):
|
||||||
if section_name == '':
|
break
|
||||||
|
if h4tag is None:
|
||||||
|
continue
|
||||||
|
author = self.tag_to_string(h4tag,use_alt=False)
|
||||||
|
try:
|
||||||
|
url = re.sub(r'\?.*', '', atag['href'])
|
||||||
|
except:
|
||||||
|
continue
|
||||||
|
if self.exclude_url(url):
|
||||||
|
continue
|
||||||
|
if '?' in url:
|
||||||
|
url += '&pagewanted=all'
|
||||||
|
else:
|
||||||
|
url += '?pagewanted=all'
|
||||||
|
if self.filterDuplicates:
|
||||||
|
if url in self.url_list:
|
||||||
continue
|
continue
|
||||||
if self.includeSections != []:
|
self.url_list.append(url)
|
||||||
if section_name not in self.includeSections:
|
title = self.tag_to_string(atag, use_alt=False).strip()
|
||||||
print "SECTION NOT INCLUDED: ",section_name
|
desc = atag.parent.find('p')
|
||||||
continue
|
if desc is not None:
|
||||||
if section_name in self.excludeSections:
|
description = self.tag_to_string(desc,use_alt=False)
|
||||||
print "SECTION EXCLUDED: ",section_name
|
else:
|
||||||
continue
|
description = ''
|
||||||
|
if not self.articles.has_key(section_name):
|
||||||
section_name=string.capwords(section_name)
|
self.ans.append(section_name)
|
||||||
section_name = section_name.replace('Op-ed','Op-Ed')
|
self.articles[section_name] = []
|
||||||
section_name = section_name.replace('U.s.','U.S.')
|
print('Title '+title+' author '+author)
|
||||||
section_name = section_name.replace('N.y.','N.Y.')
|
self.articles[section_name].append(dict(title=title, url=url, date=pubdate, description=description, author=author, content=''))
|
||||||
pubdate = strftime('%a, %d %b')
|
|
||||||
|
|
||||||
search_div = div_sec
|
|
||||||
for next_tag in h6_sec_name.findNextSiblings(True):
|
|
||||||
if next_tag.__class__.__name__ == 'Tag':
|
|
||||||
if next_tag.name == 'div':
|
|
||||||
search_div = next_tag
|
|
||||||
break
|
|
||||||
|
|
||||||
# Get the articles
|
|
||||||
for h3_item in search_div.findAll('h3'):
|
|
||||||
byline = h3_item.h6
|
|
||||||
if byline is not None:
|
|
||||||
author = self.tag_to_string(byline,use_alt=False)
|
|
||||||
else:
|
|
||||||
author = ''
|
|
||||||
a = h3_item.find('a', href=True)
|
|
||||||
if not a:
|
|
||||||
continue
|
|
||||||
url = re.sub(r'\?.*', '', a['href'])
|
|
||||||
if self.exclude_url(url):
|
|
||||||
continue
|
|
||||||
url += '?pagewanted=all'
|
|
||||||
if self.filterDuplicates:
|
|
||||||
if url in self.url_list:
|
|
||||||
continue
|
|
||||||
self.url_list.append(url)
|
|
||||||
title = self.tag_to_string(a, use_alt=True).strip()
|
|
||||||
desc = h3_item.find('p')
|
|
||||||
if desc is not None:
|
|
||||||
description = self.tag_to_string(desc,use_alt=False)
|
|
||||||
else:
|
|
||||||
description = ''
|
|
||||||
if not self.articles.has_key(section_name):
|
|
||||||
self.ans.append(section_name)
|
|
||||||
self.articles[section_name] = []
|
|
||||||
self.articles[section_name].append(dict(title=title, url=url, date=pubdate, description=description, author=author, content=''))
|
|
||||||
|
|
||||||
self.ans = [(k, self.articles[k]) for k in self.ans if self.articles.has_key(k)]
|
self.ans = [(k, self.articles[k]) for k in self.ans if self.articles.has_key(k)]
|
||||||
return self.filter_ans(self.get_tech_feeds(self.get_popular_articles(self.ans)))
|
return self.filter_ans(self.ans)
|
||||||
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
if self.headlinesOnly:
|
if self.headlinesOnly:
|
||||||
@ -833,8 +824,9 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
for divr in soup.findAll('div',attrs={'class':re.compile('w190 right')}):
|
for divr in soup.findAll('div',attrs={'class':re.compile('w190 right')}):
|
||||||
if divr.find(text=re.compile('Sign up')):
|
if divr.find(text=re.compile('Sign up')):
|
||||||
divr.extract()
|
divr.extract()
|
||||||
divr = soup.find('div',attrs={'id':re.compile('related-content')})
|
divr = soup.find('div',attrs={'class':re.compile('^relatedArticlesModule')})
|
||||||
if divr is not None:
|
if divr is not None:
|
||||||
|
print("PROCESSING RELATED: "+self.tag_to_string(soup.title,False))
|
||||||
# handle related articles
|
# handle related articles
|
||||||
rlist = []
|
rlist = []
|
||||||
ul = divr.find('ul')
|
ul = divr.find('ul')
|
||||||
@ -864,6 +856,8 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
asidediv.append(Tag(soup,'hr'))
|
asidediv.append(Tag(soup,'hr'))
|
||||||
smain = soup.find('body')
|
smain = soup.find('body')
|
||||||
smain.append(asidediv)
|
smain.append(asidediv)
|
||||||
|
else:
|
||||||
|
print("CANNOT FIND RELATED: "+self.tag_to_string(soup.title,False))
|
||||||
for atag in soup.findAll('a'):
|
for atag in soup.findAll('a'):
|
||||||
img = atag.find('img')
|
img = atag.find('img')
|
||||||
if img is not None:
|
if img is not None:
|
||||||
@ -906,6 +900,18 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
first_outer = outerdiv
|
first_outer = outerdiv
|
||||||
else:
|
else:
|
||||||
litag.extract()
|
litag.extract()
|
||||||
|
for h6tag in rdiv.findAll('h6'):
|
||||||
|
if h6tag.find('a') is not None:
|
||||||
|
if h6tag.find('a')['href'].startswith('http://www.nytimes.com'):
|
||||||
|
url = re.sub(r'\?.*', '', h6tag.find('a')['href'])
|
||||||
|
h6tag.find('a')['href'] = url+'?pagewanted=all'
|
||||||
|
h6tag.extract()
|
||||||
|
related.append(h6tag)
|
||||||
|
if first_related is None:
|
||||||
|
first_related = rdiv
|
||||||
|
first_outer = outerdiv
|
||||||
|
else:
|
||||||
|
h6tag.extract()
|
||||||
if related != []:
|
if related != []:
|
||||||
for r in related:
|
for r in related:
|
||||||
if r.h6: # don't want the anchor inside a h6 tag
|
if r.h6: # don't want the anchor inside a h6 tag
|
||||||
|
Loading…
x
Reference in New Issue
Block a user