Fix Smashing Magazine

This commit is contained in:
Kovid Goyal 2012-07-21 21:28:59 +05:30
parent f7ab95140e
commit 7d1cf168d5

View File

@ -1,50 +1,24 @@
#!/usr/bin/env python # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
__license__ = 'GPL v3'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
'''
www.smashingmagazine.com
'''
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class SmashingMagazine(BasicNewsRecipe): class SmashingMagazine (BasicNewsRecipe):
title = 'Smashing Magazine' __author__ = 'Marc Busqué <marc@lamarciana.com>'
__author__ = 'Darko Miletic' __url__ = 'http://www.lamarciana.com'
description = 'We smash you with the information that will make your life easier, really' __version__ = '1.0.1'
oldest_article = 20 __license__ = 'GPL v3'
__copyright__ = '2012, Marc Busqué <marc@lamarciana.com>'
title = u'Smashing Magazine'
description = u'Founded in September 2006, Smashing Magazine delivers useful and innovative information to Web designers and developers. Our aim is to inform our readers about the latest trends and techniques in Web development. We try to persuade you not with the quantity but with the quality of the information we present. Smashing Magazine is and always has been independent.'
language = 'en' language = 'en'
max_articles_per_feed = 100 tags = 'web development, software'
oldest_article = 7
remove_empty_feeds = True
no_stylesheets = True no_stylesheets = True
use_embedded_content = False encoding = 'utf8'
publisher = 'Smashing Magazine' cover_url = u'http://media.smashingmagazine.com/themes/smashingv4/images/logo.png'
category = 'news, web, IT, css, javascript, html' remove_attributes = ['border', 'cellspacing', 'align', 'cellpadding', 'colspan', 'valign', 'vspace', 'hspace', 'alt', 'width', 'height', 'style']
encoding = 'utf-8' extra_css = u'body div table:first-child {display: none;} img {max-width: 100%; display: block; margin: auto;}'
conversion_options = { feeds = [
'comments' : description (u'Smashing Magazine', u'http://rss1.smashingmagazine.com/feed/'),
,'tags' : category
,'publisher' : publisher
}
keep_only_tags = [dict(name='div', attrs={'id':'leftcolumn'})]
remove_tags_after = dict(name='ul',attrs={'class':'social'})
remove_tags = [
dict(name=['link','object'])
,dict(name='h1',attrs={'class':'logo'})
,dict(name='div',attrs={'id':'booklogosec'})
,dict(attrs={'src':'http://media2.smashingmagazine.com/wp-content/uploads/images/the-smashing-book/smbook6.gif'})
] ]
feeds = [(u'Articles', u'http://rss1.smashingmagazine.com/feed/')]
def preprocess_html(self, soup):
for iter in soup.findAll('div',attrs={'class':'leftframe'}):
it = iter.find('h1')
if it == None:
iter.extract()
for item in soup.findAll('img'):
oldParent = item.parent
if oldParent.name == 'a':
oldParent.name = 'div'
return soup