mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Improved Newsweek recipe (thanks to GRiker)
This commit is contained in:
parent
24f1aa4d5a
commit
d0a1ce4825
@ -4,6 +4,7 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
import re
|
||||
from calibre import strftime
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Newsweek(BasicNewsRecipe):
|
||||
@ -128,3 +129,39 @@ class Newsweek(BasicNewsRecipe):
|
||||
return cover_url
|
||||
|
||||
|
||||
def postprocess_book(self, oeb, opts, log) :
|
||||
|
||||
def extractByline(href) :
|
||||
soup = BeautifulSoup(str(oeb.manifest.hrefs[href]))
|
||||
byline = soup.find(True,attrs={'class':'authorInfo'})
|
||||
byline = self.tag_to_string(byline) if byline is not None else ''
|
||||
issueDate = soup.find(True,attrs={'class':'issueDate'})
|
||||
issueDate = self.tag_to_string(issueDate) if issueDate is not None else ''
|
||||
issueDate = re.sub(',','', issueDate)
|
||||
if byline > '' and issueDate > '' :
|
||||
return byline + ' | ' + issueDate
|
||||
else :
|
||||
return byline + issueDate
|
||||
|
||||
def extractDescription(href) :
|
||||
soup = BeautifulSoup(str(oeb.manifest.hrefs[href]))
|
||||
description = soup.find(True,attrs={'name':'description'})
|
||||
if description is not None and description.has_key('content'):
|
||||
description = description['content']
|
||||
if description.startswith('Newsweek magazine online plus') :
|
||||
description = soup.find(True, attrs={'class':'story'})
|
||||
firstPara = soup.find('p')
|
||||
description = self.tag_to_string(firstPara)
|
||||
else :
|
||||
description = soup.find(True, attrs={'class':'story'})
|
||||
firstPara = soup.find('p')
|
||||
description = self.tag_to_string(firstPara)
|
||||
return description
|
||||
|
||||
for section in oeb.toc :
|
||||
for article in section :
|
||||
if article.author is None :
|
||||
article.author = extractByline(article.href)
|
||||
if article.description is None :
|
||||
article.description = extractDescription(article.href)
|
||||
return
|
||||
|
Loading…
x
Reference in New Issue
Block a user