Fix #1093286 (Updated recipe for NSFW corp)

This commit is contained in:
Kovid Goyal 2012-12-24 08:03:48 +05:30
parent 28639d8f43
commit 52faba3c23

View File

@ -6,7 +6,6 @@ www.nsfwcorp.com
''' '''
import urllib import urllib
from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class NotSafeForWork(BasicNewsRecipe): class NotSafeForWork(BasicNewsRecipe):
@ -21,8 +20,9 @@ class NotSafeForWork(BasicNewsRecipe):
needs_subscription = True needs_subscription = True
auto_cleanup = False auto_cleanup = False
INDEX = 'https://www.nsfwcorp.com' INDEX = 'https://www.nsfwcorp.com'
LOGIN = INDEX + '/login' LOGIN = INDEX + '/login/target/'
use_embedded_content = False SETTINGS = INDEX + '/settings/'
use_embedded_content = True
language = 'en' language = 'en'
publication_type = 'magazine' publication_type = 'magazine'
masthead_url = 'http://assets.nsfwcorp.com/media/headers/nsfw_banner.jpg' masthead_url = 'http://assets.nsfwcorp.com/media/headers/nsfw_banner.jpg'
@ -46,15 +46,6 @@ class NotSafeForWork(BasicNewsRecipe):
, 'language' : language , 'language' : language
} }
remove_tags_before = dict(attrs={'id':'fromToLine'})
remove_tags_after = dict(attrs={'id':'unlockButtonDiv'})
remove_tags=[
dict(name=['meta', 'link', 'iframe', 'embed', 'object'])
,dict(name='a', attrs={'class':'switchToDeskNotes'})
,dict(attrs={'id':'unlockButtonDiv'})
]
remove_attributes = ['lang']
def get_browser(self): def get_browser(self):
br = BasicNewsRecipe.get_browser() br = BasicNewsRecipe.get_browser()
br.open(self.LOGIN) br.open(self.LOGIN)
@ -65,30 +56,12 @@ class NotSafeForWork(BasicNewsRecipe):
br.open(self.LOGIN, data) br.open(self.LOGIN, data)
return br return br
def parse_index(self): def get_feeds(self):
articles = [] self.feeds = []
soup = self.index_to_soup(self.INDEX) soup = self.index_to_soup(self.SETTINGS)
dispatches = soup.find(attrs={'id':'dispatches'}) for item in soup.findAll('input', attrs={'type':'text'}):
if dispatches: if item.has_key('value') and item['value'].startswith('http://www.nsfwcorp.com/feed/'):
for item in dispatches.findAll('h3'): self.feeds.append(item['value'])
description = u'' return self.feeds
title_link = item.find('span', attrs={'class':'dispatchTitle'}) return self.feeds
description_link = item.find('span', attrs={'class':'dispatchSubtitle'})
feed_link = item.find('a', href=True)
if feed_link:
url = self.INDEX + feed_link['href']
title = self.tag_to_string(title_link)
description = self.tag_to_string(description_link)
date = strftime(self.timefmt)
articles.append({
'title' :title
,'date' :date
,'url' :url
,'description':description
})
return [('Dispatches', articles)]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
return soup