recipe: add paper.li recipes

2025-12-04 20:25:01 -05:00 · 2010-12-12 12:56:52 +09:00 · 2010-12-12 12:56:52 +09:00 · a43274e55a
commit a43274e55a
parent ee5e7abe0b
2 changed files with 117 additions and 0 deletions
--- a/resources/recipes/paperli.recipe
+++ b/resources/recipes/paperli.recipe
@ -0,0 +1,58 @@
+__license__   = 'GPL v3'
+__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
+'''
+paperli
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+from calibre import strftime
+import re, sys
+
+class paperli(BasicNewsRecipe):
+#-------------------please change here ----------------
+    paperli_tag = 'osm'
+    title          = u'The # osm Daily - paperli'
+#-------------------------------------------------------------
+    base_url     = 'http://paper.li'
+    index          = '/tag/'+paperli_tag+'/~list'
+
+    __author__     = 'Hiroshi Miura'
+    oldest_article = 7
+    max_articles_per_feed = 100
+    description    = 'paper.li page'
+    publisher      = 'paper.li'
+    category       = 'paper.li'
+    language       = 'en'
+    encoding       = 'utf-8'
+    remove_javascript = True
+    timefmt        = '[%y/%m/%d]'
+
+    def parse_index(self):
+        feeds = []
+        newsarticles = []
+        topic = 'HEADLINE'
+
+        #for pages
+        page = self.index
+        while True:
+            soup = self.index_to_soup(''.join([self.base_url,page]))
+            for itt in soup.findAll('div',attrs={'class':'yui-u'}):
+                itema = itt.find('a',href=True,attrs={'class':'ts'})
+                if itema is not None:
+                    itemd = itt.find('div',text=True, attrs={'class':'text'})
+                    newsarticles.append({
+                                      'title'      :itema.string
+                                     ,'date'     :strftime(self.timefmt)
+                                     ,'url'        :itema['href']
+                                     ,'description':itemd.string
+                                    })
+
+            nextpage = soup.find('div',attrs={'class':'pagination_top'}).find('li', attrs={'class':'next'})
+            if nextpage is not None:
+                page = nextpage.find('a', href=True)['href']
+            else:
+                break
+
+        feeds.append((topic, newsarticles))
+        return feeds
+
--- a/resources/recipes/paperli_topic.recipe
+++ b/resources/recipes/paperli_topic.recipe
@ -0,0 +1,59 @@
+__license__   = 'GPL v3'
+__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
+'''
+paperli
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+from calibre import strftime
+import re
+
+class paperli_topics(BasicNewsRecipe):
+#-------------------please change here ----------------
+    paperli_tag = 'wikileaks'
+    title          = u'The # wikileaks Daily - paperli'
+#-------------------------------------------------------------
+    __author__     = 'Hiroshi Miura'
+    oldest_article = 7
+    max_articles_per_feed = 100
+    description    = 'paper.li page about '+ paperli_tag
+    publisher      = 'paper.li'
+    category       = 'paper.li'
+    language       = 'en'
+    encoding       = 'utf-8'
+    remove_javascript = True
+    masthead_title = u'The '+ paperli_tag +' Daily'
+    timefmt        = '[%y/%m/%d]'
+    base_url     = 'http://paper.li'          
+    index          = base_url+'/tag/'+paperli_tag
+
+
+    def parse_index(self):
+
+        # get topics
+        topics = []
+        soup   = self.index_to_soup(self.index)
+        topics_lists = soup.find('div',attrs={'class':'paper-nav-bottom'})
+        for item in topics_lists.findAll('li', attrs={'class':""}):
+            itema = item.find('a',href=True)
+            topics.append({'title': itema.string, 'url': itema['href']})
+
+        #get feeds
+        feeds = []
+        for topic in topics:
+            newsarticles = []
+            soup   = self.index_to_soup(''.join([self.base_url, topic['url'] ]))
+            topstories = soup.findAll('div',attrs={'class':'yui-u'})
+            for itt in topstories:
+                itema = itt.find('a',href=True,attrs={'class':'ts'})
+                if itema is not None:
+                    itemd = itt.find('div',text=True, attrs={'class':'text'})
+                    newsarticles.append({
+                                      'title'      :itema.string
+                                     ,'date'     :strftime(self.timefmt)
+                                     ,'url'        :itema['href']
+                                     ,'description':itemd.string
+                                    })
+            feeds.append((topic['title'], newsarticles))
+        return feeds
+