recipes: fix errors and styles

This commit is contained in:
Hiroshi Miura 2010-12-08 23:43:54 +09:00
parent 4d7bab28b6
commit 4e4c2b7e68
2 changed files with 27 additions and 19 deletions

View File

@ -14,7 +14,7 @@ class TheHeiseOnline(BasicNewsRecipe):
oldest_article = 3 oldest_article = 3
description = 'In association with Heise Online' description = 'In association with Heise Online'
publisher = 'Heise Media UK Ltd.' publisher = 'Heise Media UK Ltd.'
category = 'news, technology, security' category = 'news, technology, security, OSS, internet'
max_articles_per_feed = 100 max_articles_per_feed = 100
language = 'en' language = 'en'
encoding = 'utf-8' encoding = 'utf-8'
@ -27,6 +27,12 @@ class TheHeiseOnline(BasicNewsRecipe):
feeds = [ feeds = [
(u'The H News Feed', u'http://www.h-online.com/news/atom.xml') (u'The H News Feed', u'http://www.h-online.com/news/atom.xml')
] ]
cover_url = 'http://www.h-online.com/icons/logo_theH.gif'
remove_tags = [
dict(id="logo"),
dict(id="footer")
]
def print_version(self, url): def print_version(self, url):
return url + '?view=print' return url + '?view=print'

View File

@ -17,17 +17,27 @@ class Toyokeizai(BasicNewsRecipe):
category = 'economy, magazine, japan' category = 'economy, magazine, japan'
language = 'ja' language = 'ja'
encoding = 'euc-jp' encoding = 'euc-jp'
index = 'http://www.toyokeizai.net/news/' index = 'http://member.toyokeizai.net/news/'
remove_javascript = True remove_javascript = True
no_stylesheet = True no_stylesheets = True
masthead_title = u'TOYOKEIZAI' masthead_title = u'TOYOKEIZAI'
needs_subscription = True needs_subscription = True
timefmt = '[%y/%m/%d]' timefmt = '[%y/%m/%d]'
recursions = 5
match_regexps =[ r'page/\d+']
keep_only_tags = [dict(name='div', attrs={'class':['news']}), keep_only_tags = [
dict(name='div', attrs={'class':["news_con"]}) dict(name='div', attrs={'class':['news']}),
dict(name='div', attrs={'class':["news_cont"]}),
dict(name='div', attrs={'class':["news_con"]}),
# dict(name='div', attrs={'class':["norightsMessage"]})
]
remove_tags = [{'class':"mt35 mgz"},
{'class':"mt20 newzia"},
{'class':"mt20 fontS"},
{'class':"bk_btn_m"},
dict(id='newzia_connect_member')
] ]
remove_tags = [{'class':"mt35 mgz"}]
def parse_index(self): def parse_index(self):
feeds = [] feeds = []
@ -41,9 +51,8 @@ class Toyokeizai(BasicNewsRecipe):
newsarticles.append({ newsarticles.append({
'title' :itema.string 'title' :itema.string
,'date' :re.compile(r"\- ").sub("",itemd.string) ,'date' :re.compile(r"\- ").sub("",itemd.string)
,'url' :'http://www.toyokeizai.net' + itema['href'] ,'url' :'http://member.toyokeizai.net' + itema['href']
# ,'description':itema['title'] ,'description':itema['title']
,'description':''
}) })
feeds.append(('news', newsarticles)) feeds.append(('news', newsarticles))
return feeds return feeds
@ -57,10 +66,3 @@ class Toyokeizai(BasicNewsRecipe):
br['password'] = self.password br['password'] = self.password
res = br.submit() res = br.submit()
return br return br
def is_link_wanted(url,tag):
if re.compile(r'page//[0-9]+//$').search(url):
return True
return False