mirror of
				https://github.com/kovidgoyal/calibre.git
				synced 2025-11-04 03:27:00 -05:00 
			
		
		
		
	Various Japanes news sources, National Geographic and paper.li by Hirosi Miura
This commit is contained in:
		
						commit
						429e477674
					
				
							
								
								
									
										23
									
								
								resources/recipes/ajiajin.recipe
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										23
									
								
								resources/recipes/ajiajin.recipe
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,23 @@
 | 
				
			|||||||
 | 
					__license__   = 'GPL v3'
 | 
				
			||||||
 | 
					__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
 | 
				
			||||||
 | 
					'''
 | 
				
			||||||
 | 
					ajiajin.com/blog
 | 
				
			||||||
 | 
					'''
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					from calibre.web.feeds.news import BasicNewsRecipe
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class AjiajinBlog(BasicNewsRecipe):
 | 
				
			||||||
 | 
					    title          = u'Ajiajin blog'
 | 
				
			||||||
 | 
					    __author__     = 'Hiroshi Miura'
 | 
				
			||||||
 | 
					    oldest_article = 5
 | 
				
			||||||
 | 
					    publication_type = 'blog'
 | 
				
			||||||
 | 
					    max_articles_per_feed = 100
 | 
				
			||||||
 | 
					    description    = 'The next generation internet trends in Japan and Asia'
 | 
				
			||||||
 | 
					    publisher      = ''
 | 
				
			||||||
 | 
					    category       = 'internet, asia, japan'
 | 
				
			||||||
 | 
					    language       = 'en'
 | 
				
			||||||
 | 
					    encoding      = 'utf-8'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    feeds          = [(u'blog', u'http://feeds.feedburner.com/Asiajin')]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
							
								
								
									
										37
									
								
								resources/recipes/chouchoublog.recipe
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										37
									
								
								resources/recipes/chouchoublog.recipe
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,37 @@
 | 
				
			|||||||
 | 
					__license__   = 'GPL v3'
 | 
				
			||||||
 | 
					__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
 | 
				
			||||||
 | 
					'''
 | 
				
			||||||
 | 
					http://ameblo.jp/
 | 
				
			||||||
 | 
					'''
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import re
 | 
				
			||||||
 | 
					from calibre.web.feeds.news import BasicNewsRecipe
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class SakuraBlog(BasicNewsRecipe):
 | 
				
			||||||
 | 
					    title          = u'chou chou blog'
 | 
				
			||||||
 | 
					    __author__     = 'Hiroshi Miura'
 | 
				
			||||||
 | 
					    oldest_article = 4
 | 
				
			||||||
 | 
					    publication_type = 'blog'
 | 
				
			||||||
 | 
					    max_articles_per_feed = 20
 | 
				
			||||||
 | 
					    description    = 'Japanese popular dog blog'
 | 
				
			||||||
 | 
					    publisher      = ''
 | 
				
			||||||
 | 
					    category       = 'dog, pet, japan'
 | 
				
			||||||
 | 
					    language       = 'ja'
 | 
				
			||||||
 | 
					    encoding      = 'utf-8'
 | 
				
			||||||
 | 
					    use_embedded_content = True
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    feeds          = [(u'blog', u'http://feedblog.ameba.jp/rss/ameblo/chouchou1218/rss20.xml')]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def parse_feeds(self):
 | 
				
			||||||
 | 
					        feeds = BasicNewsRecipe.parse_feeds(self)
 | 
				
			||||||
 | 
					        for curfeed in feeds:
 | 
				
			||||||
 | 
					            delList = []
 | 
				
			||||||
 | 
					            for a,curarticle in enumerate(curfeed.articles):
 | 
				
			||||||
 | 
					                if re.search(r'rssad.jp', curarticle.url):
 | 
				
			||||||
 | 
					                    delList.append(curarticle)
 | 
				
			||||||
 | 
					            if len(delList)>0:
 | 
				
			||||||
 | 
					                for d in delList:
 | 
				
			||||||
 | 
					                    index = curfeed.articles.index(d)
 | 
				
			||||||
 | 
					                    curfeed.articles[index:index+1] = []
 | 
				
			||||||
 | 
					        return feeds
 | 
				
			||||||
 | 
					
 | 
				
			||||||
							
								
								
									
										31
									
								
								resources/recipes/kahokushinpo.recipe
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										31
									
								
								resources/recipes/kahokushinpo.recipe
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,31 @@
 | 
				
			|||||||
 | 
					__license__   = 'GPL v3'
 | 
				
			||||||
 | 
					__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
 | 
				
			||||||
 | 
					'''
 | 
				
			||||||
 | 
					www.kahoku.co.jp
 | 
				
			||||||
 | 
					'''
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					from calibre.web.feeds.news import BasicNewsRecipe
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class KahokuShinpoNews(BasicNewsRecipe):
 | 
				
			||||||
 | 
					    title          = u'\u6cb3\u5317\u65b0\u5831'
 | 
				
			||||||
 | 
					    __author__     = 'Hiroshi Miura'
 | 
				
			||||||
 | 
					    oldest_article = 2
 | 
				
			||||||
 | 
					    max_articles_per_feed = 20
 | 
				
			||||||
 | 
					    description    = 'Tohoku regional news paper in Japan'
 | 
				
			||||||
 | 
					    publisher      = 'Kahoku Shinpo Sha'
 | 
				
			||||||
 | 
					    category       = 'news, japan'
 | 
				
			||||||
 | 
					    language       = 'ja'
 | 
				
			||||||
 | 
					    encoding      = 'Shift_JIS'
 | 
				
			||||||
 | 
					    no_stylesheets = True
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    feeds          = [(u'news', u'http://www.kahoku.co.jp/rss/index_thk.xml')]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    keep_only_tags = [ dict(id="page_title"),
 | 
				
			||||||
 | 
					                                   dict(id="news_detail"),
 | 
				
			||||||
 | 
					                                   dict(id="bt_title"),
 | 
				
			||||||
 | 
					                                   {'class':"photoLeft"},
 | 
				
			||||||
 | 
					                                   dict(id="bt_body")
 | 
				
			||||||
 | 
					                                 ]
 | 
				
			||||||
 | 
					    remove_tags = [ {'class':"button"}]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
							
								
								
									
										38
									
								
								resources/recipes/nationalgeographic.recipe
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										38
									
								
								resources/recipes/nationalgeographic.recipe
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,38 @@
 | 
				
			|||||||
 | 
					__license__   = 'GPL v3'
 | 
				
			||||||
 | 
					__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
 | 
				
			||||||
 | 
					'''
 | 
				
			||||||
 | 
					nationalgeographic.com
 | 
				
			||||||
 | 
					'''
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					from calibre.web.feeds.news import BasicNewsRecipe
 | 
				
			||||||
 | 
					import re
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class NationalGeographicNews(BasicNewsRecipe):
 | 
				
			||||||
 | 
					    title          = u'National Geographic News'
 | 
				
			||||||
 | 
					    oldest_article = 7
 | 
				
			||||||
 | 
					    max_articles_per_feed = 100
 | 
				
			||||||
 | 
					    remove_javascript = True
 | 
				
			||||||
 | 
					    no_stylesheets = True
 | 
				
			||||||
 | 
					    use_embedded_content = False
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    feeds          = [(u'news', u'http://feeds.nationalgeographic.com/ng/News/News_Main')]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    remove_tags_before = dict(id='page_head')
 | 
				
			||||||
 | 
					    remove_tags_after = [dict(id='social_buttons'),{'class':'aside'}]
 | 
				
			||||||
 | 
					    remove_tags = [
 | 
				
			||||||
 | 
					                       {'class':'hidden'}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                     ]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def parse_feeds(self):
 | 
				
			||||||
 | 
					        feeds = BasicNewsRecipe.parse_feeds(self)
 | 
				
			||||||
 | 
					        for curfeed in feeds:
 | 
				
			||||||
 | 
					            delList = []
 | 
				
			||||||
 | 
					            for a,curarticle in enumerate(curfeed.articles):
 | 
				
			||||||
 | 
					                if re.search(r'ads\.pheedo\.com', curarticle.url):
 | 
				
			||||||
 | 
					                    delList.append(curarticle)
 | 
				
			||||||
 | 
					            if len(delList)>0:
 | 
				
			||||||
 | 
					                for d in delList:
 | 
				
			||||||
 | 
					                    index = curfeed.articles.index(d)
 | 
				
			||||||
 | 
					                    curfeed.articles[index:index+1] = []
 | 
				
			||||||
 | 
					        return feeds
 | 
				
			||||||
							
								
								
									
										20
									
								
								resources/recipes/nationalgeographicjp.recipe
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										20
									
								
								resources/recipes/nationalgeographicjp.recipe
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,20 @@
 | 
				
			|||||||
 | 
					__license__   = 'GPL v3'
 | 
				
			||||||
 | 
					__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
 | 
				
			||||||
 | 
					'''
 | 
				
			||||||
 | 
					nationalgeographic.co.jp
 | 
				
			||||||
 | 
					'''
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					from calibre.web.feeds.news import BasicNewsRecipe
 | 
				
			||||||
 | 
					import re
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class NationalGeoJp(BasicNewsRecipe):
 | 
				
			||||||
 | 
					    title          = u'\u30ca\u30b7\u30e7\u30ca\u30eb\u30fb\u30b8\u30aa\u30b0\u30e9\u30d5\u30a3\u30c3\u30af\u30cb\u30e5\u30fc\u30b9'
 | 
				
			||||||
 | 
					    oldest_article = 7
 | 
				
			||||||
 | 
					    max_articles_per_feed = 100
 | 
				
			||||||
 | 
					    no_stylesheets = True
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    feeds          = [(u'news', u'http://www.nationalgeographic.co.jp/news/rss.php')]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def print_version(self, url):
 | 
				
			||||||
 | 
					        return re.sub(r'news_article.php','news_printer_friendly.php', url)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -10,8 +10,8 @@ import mechanize
 | 
				
			|||||||
from calibre.ptempfile import PersistentTemporaryFile
 | 
					from calibre.ptempfile import PersistentTemporaryFile
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class NikkeiNet_sub_life(BasicNewsRecipe):
 | 
					class NikkeiNet_sub_shakai(BasicNewsRecipe):
 | 
				
			||||||
    title           = u'\u65e5\u7d4c\u65b0\u805e\u96fb\u5b50\u7248(\u751f\u6d3b)'
 | 
					    title           = u'\u65e5\u7d4c\u65b0\u805e\u96fb\u5b50\u7248(Social)'
 | 
				
			||||||
    __author__      = 'Hiroshi Miura'
 | 
					    __author__      = 'Hiroshi Miura'
 | 
				
			||||||
    description     = 'News and current market affairs from Japan'
 | 
					    description     = 'News and current market affairs from Japan'
 | 
				
			||||||
    cover_url       = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
 | 
					    cover_url       = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
 | 
				
			||||||
 | 
				
			|||||||
							
								
								
									
										58
									
								
								resources/recipes/paperli_topic.recipe
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										58
									
								
								resources/recipes/paperli_topic.recipe
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,58 @@
 | 
				
			|||||||
 | 
					__license__   = 'GPL v3'
 | 
				
			||||||
 | 
					__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
 | 
				
			||||||
 | 
					'''
 | 
				
			||||||
 | 
					paperli
 | 
				
			||||||
 | 
					'''
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					from calibre.web.feeds.news import BasicNewsRecipe
 | 
				
			||||||
 | 
					from calibre import strftime
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class paperli_topics(BasicNewsRecipe):
 | 
				
			||||||
 | 
					    # Customize this recipe and change paperli_tag and title below to
 | 
				
			||||||
 | 
					    # download news on your favorite tag
 | 
				
			||||||
 | 
					    paperli_tag = 'climate'
 | 
				
			||||||
 | 
					    title          = u'The #climate Daily - paperli'
 | 
				
			||||||
 | 
					#-------------------------------------------------------------
 | 
				
			||||||
 | 
					    __author__     = 'Hiroshi Miura'
 | 
				
			||||||
 | 
					    oldest_article = 7
 | 
				
			||||||
 | 
					    max_articles_per_feed = 100
 | 
				
			||||||
 | 
					    description    = 'paper.li page about '+ paperli_tag
 | 
				
			||||||
 | 
					    publisher      = 'paper.li'
 | 
				
			||||||
 | 
					    category       = 'paper.li'
 | 
				
			||||||
 | 
					    language       = 'en'
 | 
				
			||||||
 | 
					    encoding       = 'utf-8'
 | 
				
			||||||
 | 
					    remove_javascript = True
 | 
				
			||||||
 | 
					    masthead_title = u'The '+ paperli_tag +' Daily'
 | 
				
			||||||
 | 
					    timefmt        = '[%y/%m/%d]'
 | 
				
			||||||
 | 
					    base_url     = 'http://paper.li'
 | 
				
			||||||
 | 
					    index          = base_url+'/tag/'+paperli_tag
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def parse_index(self):
 | 
				
			||||||
 | 
					        # get topics
 | 
				
			||||||
 | 
					        topics = []
 | 
				
			||||||
 | 
					        soup   = self.index_to_soup(self.index)
 | 
				
			||||||
 | 
					        topics_lists = soup.find('div',attrs={'class':'paper-nav-bottom'})
 | 
				
			||||||
 | 
					        for item in topics_lists.findAll('li', attrs={'class':""}):
 | 
				
			||||||
 | 
					            itema = item.find('a',href=True)
 | 
				
			||||||
 | 
					            topics.append({'title': itema.string, 'url': itema['href']})
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        #get feeds
 | 
				
			||||||
 | 
					        feeds = []
 | 
				
			||||||
 | 
					        for topic in topics:
 | 
				
			||||||
 | 
					            newsarticles = []
 | 
				
			||||||
 | 
					            soup   = self.index_to_soup(''.join([self.base_url, topic['url'] ]))
 | 
				
			||||||
 | 
					            topstories = soup.findAll('div',attrs={'class':'yui-u'})
 | 
				
			||||||
 | 
					            for itt in topstories:
 | 
				
			||||||
 | 
					                itema = itt.find('a',href=True,attrs={'class':'ts'})
 | 
				
			||||||
 | 
					                if itema is not None:
 | 
				
			||||||
 | 
					                    itemd = itt.find('div',text=True, attrs={'class':'text'})
 | 
				
			||||||
 | 
					                    newsarticles.append({
 | 
				
			||||||
 | 
					                                      'title'      :itema.string
 | 
				
			||||||
 | 
					                                     ,'date'     :strftime(self.timefmt)
 | 
				
			||||||
 | 
					                                     ,'url'        :itema['href']
 | 
				
			||||||
 | 
					                                     ,'description':itemd.string
 | 
				
			||||||
 | 
					                                    })
 | 
				
			||||||
 | 
					            feeds.append((topic['title'], newsarticles))
 | 
				
			||||||
 | 
					        return feeds
 | 
				
			||||||
 | 
					
 | 
				
			||||||
							
								
								
									
										36
									
								
								resources/recipes/uninohimitu.recipe
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										36
									
								
								resources/recipes/uninohimitu.recipe
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,36 @@
 | 
				
			|||||||
 | 
					__license__   = 'GPL v3'
 | 
				
			||||||
 | 
					__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
 | 
				
			||||||
 | 
					'''
 | 
				
			||||||
 | 
					http://ameblo.jp/sauta19/
 | 
				
			||||||
 | 
					'''
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import re
 | 
				
			||||||
 | 
					from calibre.web.feeds.news import BasicNewsRecipe
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class UniNoHimituKichiBlog(BasicNewsRecipe):
 | 
				
			||||||
 | 
					    title          = u'Uni secret base'
 | 
				
			||||||
 | 
					    __author__     = 'Hiroshi Miura'
 | 
				
			||||||
 | 
					    oldest_article = 2
 | 
				
			||||||
 | 
					    publication_type = 'blog'
 | 
				
			||||||
 | 
					    max_articles_per_feed = 20
 | 
				
			||||||
 | 
					    description    = 'Japanese famous Cat blog'
 | 
				
			||||||
 | 
					    publisher      = ''
 | 
				
			||||||
 | 
					    category       = 'cat, pet, japan'
 | 
				
			||||||
 | 
					    language       = 'ja'
 | 
				
			||||||
 | 
					    encoding      = 'utf-8'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    feeds          = [(u'blog', u'http://feedblog.ameba.jp/rss/ameblo/sauta19/rss20.xml')]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def parse_feeds(self):
 | 
				
			||||||
 | 
					        feeds = BasicNewsRecipe.parse_feeds(self)
 | 
				
			||||||
 | 
					        for curfeed in feeds:
 | 
				
			||||||
 | 
					            delList = []
 | 
				
			||||||
 | 
					            for a,curarticle in enumerate(curfeed.articles):
 | 
				
			||||||
 | 
					                if re.search(r'rssad.jp', curarticle.url):
 | 
				
			||||||
 | 
					                    delList.append(curarticle)
 | 
				
			||||||
 | 
					            if len(delList)>0:
 | 
				
			||||||
 | 
					                for d in delList:
 | 
				
			||||||
 | 
					                    index = curfeed.articles.index(d)
 | 
				
			||||||
 | 
					                    curfeed.articles[index:index+1] = []
 | 
				
			||||||
 | 
					        return feeds
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user