mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
Various Japanes news sources, National Geographic and paper.li by Hirosi Miura
This commit is contained in:
commit
429e477674
23
resources/recipes/ajiajin.recipe
Normal file
23
resources/recipes/ajiajin.recipe
Normal file
@ -0,0 +1,23 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
|
||||
'''
|
||||
ajiajin.com/blog
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AjiajinBlog(BasicNewsRecipe):
|
||||
title = u'Ajiajin blog'
|
||||
__author__ = 'Hiroshi Miura'
|
||||
oldest_article = 5
|
||||
publication_type = 'blog'
|
||||
max_articles_per_feed = 100
|
||||
description = 'The next generation internet trends in Japan and Asia'
|
||||
publisher = ''
|
||||
category = 'internet, asia, japan'
|
||||
language = 'en'
|
||||
encoding = 'utf-8'
|
||||
|
||||
feeds = [(u'blog', u'http://feeds.feedburner.com/Asiajin')]
|
||||
|
||||
|
37
resources/recipes/chouchoublog.recipe
Normal file
37
resources/recipes/chouchoublog.recipe
Normal file
@ -0,0 +1,37 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
|
||||
'''
|
||||
http://ameblo.jp/
|
||||
'''
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class SakuraBlog(BasicNewsRecipe):
|
||||
title = u'chou chou blog'
|
||||
__author__ = 'Hiroshi Miura'
|
||||
oldest_article = 4
|
||||
publication_type = 'blog'
|
||||
max_articles_per_feed = 20
|
||||
description = 'Japanese popular dog blog'
|
||||
publisher = ''
|
||||
category = 'dog, pet, japan'
|
||||
language = 'ja'
|
||||
encoding = 'utf-8'
|
||||
use_embedded_content = True
|
||||
|
||||
feeds = [(u'blog', u'http://feedblog.ameba.jp/rss/ameblo/chouchou1218/rss20.xml')]
|
||||
|
||||
def parse_feeds(self):
|
||||
feeds = BasicNewsRecipe.parse_feeds(self)
|
||||
for curfeed in feeds:
|
||||
delList = []
|
||||
for a,curarticle in enumerate(curfeed.articles):
|
||||
if re.search(r'rssad.jp', curarticle.url):
|
||||
delList.append(curarticle)
|
||||
if len(delList)>0:
|
||||
for d in delList:
|
||||
index = curfeed.articles.index(d)
|
||||
curfeed.articles[index:index+1] = []
|
||||
return feeds
|
||||
|
31
resources/recipes/kahokushinpo.recipe
Normal file
31
resources/recipes/kahokushinpo.recipe
Normal file
@ -0,0 +1,31 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
|
||||
'''
|
||||
www.kahoku.co.jp
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class KahokuShinpoNews(BasicNewsRecipe):
|
||||
title = u'\u6cb3\u5317\u65b0\u5831'
|
||||
__author__ = 'Hiroshi Miura'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 20
|
||||
description = 'Tohoku regional news paper in Japan'
|
||||
publisher = 'Kahoku Shinpo Sha'
|
||||
category = 'news, japan'
|
||||
language = 'ja'
|
||||
encoding = 'Shift_JIS'
|
||||
no_stylesheets = True
|
||||
|
||||
feeds = [(u'news', u'http://www.kahoku.co.jp/rss/index_thk.xml')]
|
||||
|
||||
keep_only_tags = [ dict(id="page_title"),
|
||||
dict(id="news_detail"),
|
||||
dict(id="bt_title"),
|
||||
{'class':"photoLeft"},
|
||||
dict(id="bt_body")
|
||||
]
|
||||
remove_tags = [ {'class':"button"}]
|
||||
|
38
resources/recipes/nationalgeographic.recipe
Normal file
38
resources/recipes/nationalgeographic.recipe
Normal file
@ -0,0 +1,38 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
|
||||
'''
|
||||
nationalgeographic.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
import re
|
||||
|
||||
class NationalGeographicNews(BasicNewsRecipe):
|
||||
title = u'National Geographic News'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
|
||||
feeds = [(u'news', u'http://feeds.nationalgeographic.com/ng/News/News_Main')]
|
||||
|
||||
remove_tags_before = dict(id='page_head')
|
||||
remove_tags_after = [dict(id='social_buttons'),{'class':'aside'}]
|
||||
remove_tags = [
|
||||
{'class':'hidden'}
|
||||
|
||||
]
|
||||
|
||||
def parse_feeds(self):
|
||||
feeds = BasicNewsRecipe.parse_feeds(self)
|
||||
for curfeed in feeds:
|
||||
delList = []
|
||||
for a,curarticle in enumerate(curfeed.articles):
|
||||
if re.search(r'ads\.pheedo\.com', curarticle.url):
|
||||
delList.append(curarticle)
|
||||
if len(delList)>0:
|
||||
for d in delList:
|
||||
index = curfeed.articles.index(d)
|
||||
curfeed.articles[index:index+1] = []
|
||||
return feeds
|
20
resources/recipes/nationalgeographicjp.recipe
Normal file
20
resources/recipes/nationalgeographicjp.recipe
Normal file
@ -0,0 +1,20 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
|
||||
'''
|
||||
nationalgeographic.co.jp
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
import re
|
||||
|
||||
class NationalGeoJp(BasicNewsRecipe):
|
||||
title = u'\u30ca\u30b7\u30e7\u30ca\u30eb\u30fb\u30b8\u30aa\u30b0\u30e9\u30d5\u30a3\u30c3\u30af\u30cb\u30e5\u30fc\u30b9'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
|
||||
feeds = [(u'news', u'http://www.nationalgeographic.co.jp/news/rss.php')]
|
||||
|
||||
def print_version(self, url):
|
||||
return re.sub(r'news_article.php','news_printer_friendly.php', url)
|
||||
|
@ -10,8 +10,8 @@ import mechanize
|
||||
from calibre.ptempfile import PersistentTemporaryFile
|
||||
|
||||
|
||||
class NikkeiNet_sub_life(BasicNewsRecipe):
|
||||
title = u'\u65e5\u7d4c\u65b0\u805e\u96fb\u5b50\u7248(\u751f\u6d3b)'
|
||||
class NikkeiNet_sub_shakai(BasicNewsRecipe):
|
||||
title = u'\u65e5\u7d4c\u65b0\u805e\u96fb\u5b50\u7248(Social)'
|
||||
__author__ = 'Hiroshi Miura'
|
||||
description = 'News and current market affairs from Japan'
|
||||
cover_url = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
|
||||
|
58
resources/recipes/paperli_topic.recipe
Normal file
58
resources/recipes/paperli_topic.recipe
Normal file
@ -0,0 +1,58 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
|
||||
'''
|
||||
paperli
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre import strftime
|
||||
|
||||
class paperli_topics(BasicNewsRecipe):
|
||||
# Customize this recipe and change paperli_tag and title below to
|
||||
# download news on your favorite tag
|
||||
paperli_tag = 'climate'
|
||||
title = u'The #climate Daily - paperli'
|
||||
#-------------------------------------------------------------
|
||||
__author__ = 'Hiroshi Miura'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
description = 'paper.li page about '+ paperli_tag
|
||||
publisher = 'paper.li'
|
||||
category = 'paper.li'
|
||||
language = 'en'
|
||||
encoding = 'utf-8'
|
||||
remove_javascript = True
|
||||
masthead_title = u'The '+ paperli_tag +' Daily'
|
||||
timefmt = '[%y/%m/%d]'
|
||||
base_url = 'http://paper.li'
|
||||
index = base_url+'/tag/'+paperli_tag
|
||||
|
||||
|
||||
def parse_index(self):
|
||||
# get topics
|
||||
topics = []
|
||||
soup = self.index_to_soup(self.index)
|
||||
topics_lists = soup.find('div',attrs={'class':'paper-nav-bottom'})
|
||||
for item in topics_lists.findAll('li', attrs={'class':""}):
|
||||
itema = item.find('a',href=True)
|
||||
topics.append({'title': itema.string, 'url': itema['href']})
|
||||
|
||||
#get feeds
|
||||
feeds = []
|
||||
for topic in topics:
|
||||
newsarticles = []
|
||||
soup = self.index_to_soup(''.join([self.base_url, topic['url'] ]))
|
||||
topstories = soup.findAll('div',attrs={'class':'yui-u'})
|
||||
for itt in topstories:
|
||||
itema = itt.find('a',href=True,attrs={'class':'ts'})
|
||||
if itema is not None:
|
||||
itemd = itt.find('div',text=True, attrs={'class':'text'})
|
||||
newsarticles.append({
|
||||
'title' :itema.string
|
||||
,'date' :strftime(self.timefmt)
|
||||
,'url' :itema['href']
|
||||
,'description':itemd.string
|
||||
})
|
||||
feeds.append((topic['title'], newsarticles))
|
||||
return feeds
|
||||
|
36
resources/recipes/uninohimitu.recipe
Normal file
36
resources/recipes/uninohimitu.recipe
Normal file
@ -0,0 +1,36 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
|
||||
'''
|
||||
http://ameblo.jp/sauta19/
|
||||
'''
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class UniNoHimituKichiBlog(BasicNewsRecipe):
|
||||
title = u'Uni secret base'
|
||||
__author__ = 'Hiroshi Miura'
|
||||
oldest_article = 2
|
||||
publication_type = 'blog'
|
||||
max_articles_per_feed = 20
|
||||
description = 'Japanese famous Cat blog'
|
||||
publisher = ''
|
||||
category = 'cat, pet, japan'
|
||||
language = 'ja'
|
||||
encoding = 'utf-8'
|
||||
|
||||
feeds = [(u'blog', u'http://feedblog.ameba.jp/rss/ameblo/sauta19/rss20.xml')]
|
||||
|
||||
def parse_feeds(self):
|
||||
feeds = BasicNewsRecipe.parse_feeds(self)
|
||||
for curfeed in feeds:
|
||||
delList = []
|
||||
for a,curarticle in enumerate(curfeed.articles):
|
||||
if re.search(r'rssad.jp', curarticle.url):
|
||||
delList.append(curarticle)
|
||||
if len(delList)>0:
|
||||
for d in delList:
|
||||
index = curfeed.articles.index(d)
|
||||
curfeed.articles[index:index+1] = []
|
||||
return feeds
|
||||
|
Loading…
x
Reference in New Issue
Block a user