Improved recipe for Slate

This commit is contained in:
Kovid Goyal 2009-12-08 07:44:10 -07:00
parent 750f9d25b1
commit b105b4cdbc

View File

@ -14,7 +14,7 @@ class PeriodicalNameHere(BasicNewsRecipe):
# Method variables for customizing downloads # Method variables for customizing downloads
title = 'Slate' title = 'Slate'
description = 'A general-interest publication offering analysis and commentary about politics, news and culture.' description = 'A general-interest publication offering analysis and commentary about politics, news and culture.'
__author__ = 'GRiker' __author__ = 'GRiker and Sujata Raman'
max_articles_per_feed = 20 max_articles_per_feed = 20
oldest_article = 7.0 oldest_article = 7.0
recursions = 0 recursions = 0
@ -50,8 +50,7 @@ class PeriodicalNameHere(BasicNewsRecipe):
# The second entry is for 'Big Money', which comes from a different site, uses different markup # The second entry is for 'Big Money', which comes from a different site, uses different markup
remove_tags = [dict(attrs={ 'id':['toolbox','recommend_tab','insider_ad_wrapper', remove_tags = [dict(attrs={ 'id':['toolbox','recommend_tab','insider_ad_wrapper',
'article_bottom_tools_cntr','fray_article_discussion', 'article_bottom_tools_cntr','fray_article_discussion', 'fray_article_links','bottom_sponsored_links','author_bio',
'fray_article_links','bottom_sponsored_links','author_bio',
'bizbox_links_bottom','ris_links_wrapper','BOXXLE']}), 'bizbox_links_bottom','ris_links_wrapper','BOXXLE']}),
dict(attrs={ 'id':['content-top','service-links-bottom','hed']}) ] dict(attrs={ 'id':['content-top','service-links-bottom','hed']}) ]
@ -60,18 +59,20 @@ class PeriodicalNameHere(BasicNewsRecipe):
excludedAuthorKeywords = [] excludedAuthorKeywords = []
excludedContentKeywords = ['http://twitter.com/Slate'] excludedContentKeywords = ['http://twitter.com/Slate']
extra_css = '.headline {text-align:left;}\n\ extra_css = '''
.byline {font-family: monospace; \ .h1_subhead{font-family:Arial; font-size:small; }
text-align: left;\ h1{font-family:Verdana; font-size:large; }
margin-bottom: 0px;}\n\ .byline {font-family:Georgia; margin-bottom: 0px; color: #660033;}
.dateline {text-align: left; \ .dateline {font-family:Arial; font-size: smaller; height: 0pt; color:#666666;}
font-size: smaller;\ .imagewrapper {font-family:Verdana;font-size:x-small; }
height: 0pt;}\n\ .source {font-family:Verdana; font-size:x-small;}
.imagewrapper {text-align: center;}\n\ .credit {font-family:Verdana; font-size: smaller;}
.source {text-align: left;}\n\ #article_body {font-family:Verdana; }
.credit {text-align: right;\ #content {font-family:Arial; }
font-size: smaller;}\n\ .caption{font-family:Verdana;font-style:italic; font-size:x-small;}
.article_body {text-align: left;}\n' h3{font-family:Arial; color:#666666; font-size:small}
a{color:#0066CC;}
'''
# Local variables to extend class # Local variables to extend class
baseURL = 'http://slate.com' baseURL = 'http://slate.com'
@ -339,6 +340,9 @@ class PeriodicalNameHere(BasicNewsRecipe):
# Change <h1> to <h2> # Change <h1> to <h2>
headline = soup.find("h1") headline = soup.find("h1")
tag = headline.find("span")
tag.name = 'div'
if headline is not None : if headline is not None :
h2tag = Tag(soup, "h2") h2tag = Tag(soup, "h2")
h2tag['class'] = "headline" h2tag['class'] = "headline"
@ -348,8 +352,8 @@ class PeriodicalNameHere(BasicNewsRecipe):
result += substr result += substr
if i < len(strs) -1 : if i < len(strs) -1 :
result += '<br />' result += '<br />'
h2tag.insert(0, result) #h2tag.insert(0, result)
headline.replaceWith(h2tag) #headline.replaceWith(h2tag)
# Fix up the concatenated byline and dateline # Fix up the concatenated byline and dateline
byline = soup.find(True,attrs={'class':'byline'}) byline = soup.find(True,attrs={'class':'byline'})