mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Fix Newsweek recipe
This commit is contained in:
parent
0a840e9e2b
commit
f457341a36
@ -62,7 +62,9 @@ class Economist(BasicNewsRecipe):
|
||||
a = tag.find('a', href=True)
|
||||
if a is not None:
|
||||
url=a['href'].replace('displaystory', 'PrinterFriendly').strip()
|
||||
if url.startswith('/') or url.startswith('PrinterF'):
|
||||
if url.startswith('Printer'):
|
||||
url = '/'+url
|
||||
if url.startswith('/'):
|
||||
url = 'http://www.economist.com' + url
|
||||
try:
|
||||
subtitle = tag.previousSibling.contents[0].contents[0]
|
||||
|
@ -89,6 +89,17 @@ class Newsweek(BasicNewsRecipe):
|
||||
return cmp(tx, ty)
|
||||
return sorted(ans, cmp=fcmp)
|
||||
|
||||
def ensure_html(self, soup):
|
||||
root = soup.find(name=True)
|
||||
if root.name == 'html': return soup
|
||||
nsoup = BeautifulSoup('<html><head></head><body/></html>')
|
||||
nroot = nsoup.find(name='body')
|
||||
for x in soup.contents:
|
||||
if getattr(x, 'name', False):
|
||||
x.extract()
|
||||
nroot.insert(len(nroot), x)
|
||||
return nsoup
|
||||
|
||||
def postprocess_html(self, soup, first_fetch):
|
||||
if not first_fetch:
|
||||
h1 = soup.find(id='headline')
|
||||
@ -99,7 +110,7 @@ class Newsweek(BasicNewsRecipe):
|
||||
div.extract()
|
||||
divs = list(soup.findAll('div', 'pagination'))
|
||||
if not divs:
|
||||
return soup
|
||||
return self.ensure_html(soup)
|
||||
for div in divs[1:]: div.extract()
|
||||
all_a = divs[0].findAll('a', href=True)
|
||||
divs[0]['style']="display:none"
|
||||
@ -109,7 +120,7 @@ class Newsweek(BasicNewsRecipe):
|
||||
for a in soup.findAll('a', href=test):
|
||||
if a not in all_a:
|
||||
del a['href']
|
||||
return soup
|
||||
return self.ensure_html(soup)
|
||||
|
||||
def get_current_issue(self):
|
||||
soup = self.index_to_soup('http://www.newsweek.com')
|
||||
|
Loading…
x
Reference in New Issue
Block a user