The Feature by Jose Pinto

2025-11-27 00:35:00 -05:00 · 2013-04-12 21:06:55 +05:30 · 2013-04-12 21:06:55 +05:30 · ece2442f6a
commit ece2442f6a
parent 1096eaaadf
2 changed files with 11 additions and 90 deletions
--- a/recipes/givemesomethingtoread.recipe
+++ b/recipes/givemesomethingtoread.recipe
@ -1,90 +0,0 @@
-import re
-from calibre.web.feeds.news import BasicNewsRecipe
-
-class GiveMeSomethingToRead(BasicNewsRecipe):
-    title          = u'Give Me Something To Read'
-    description    = 'Curation / aggregation of articles on diverse topics'
-    language = 'en'
-    __author__     = 'barty on mobileread.com forum'
-    max_articles_per_feed = 100
-    no_stylesheets = False
-    timefmt        = ' [%a, %d %b, %Y]'
-    oldest_article = 365
-    auto_cleanup   = True
-    INDEX          = 'http://givemesomethingtoread.com'
-    CATEGORIES     = [
-        # comment out categories you don't want
-        # (user friendly name, system name, max number of articles to load)
-        ('The Arts','arts',25),
-        ('Science','science',30),
-        ('Technology','technology',30),
-        ('Politics','politics',20),
-        ('Media','media',30),
-        ('Crime','crime',15),
-        ('Other articles','',10)
-        ]
-
-    def parse_index(self):
-        self.cover_url = 'http://thegretchenshow.files.wordpress.com/2009/12/well-read-cat-small.jpg'
-        feeds = []
-        seen_urls = set([])
-        regex = re.compile( r'http://(www\.)?([^/:]+)', re.I)
-
-        for category in self.CATEGORIES:
-
-            (cat_name, tag, max_articles) = category
-
-            tagurl = '' if tag=='' else '/tagged/'+tag
-            self.log('Reading category:', cat_name)
-
-            articles = []
-            pageno = 1
-
-            while len(articles) < max_articles and pageno < 100:
-
-                page = "%s%s/page/%d" % (self.INDEX, tagurl, pageno) if pageno > 1 else self.INDEX + tagurl
-                pageno += 1
-
-                self.log('\tReading page:', page)
-                try:
-                    soup = self.index_to_soup(page)
-                except:
-                    break
-
-                headers = soup.findAll('h2')
-                if len(headers) == .0:
-                    break
-
-                for header in headers:
-                    atag = header.find('a')
-                    url = atag['href']
-                    # skip promotionals and duplicate
-                    if url.startswith('http://givemesomethingtoread') or url.startswith('/') or url in seen_urls:
-                        continue
-                    seen_urls.add(url)
-                    title = self.tag_to_string(header)
-                    self.log('\tFound article:', title)
-                    #self.log('\t', url)
-                    desc = header.parent.find('blockquote')
-                    desc = self.tag_to_string(desc) if desc else ''
-                    m = regex.match( url)
-                    if m:
-                        desc = "[%s] %s" %  (m.group(2), desc)
-                    #self.log('\t', desc)
-                    date = ''
-                    p = header.parent.previousSibling
-                    # navigate up to find h3, which contains the date
-                    while p:
-                        if hasattr(p,'name') and p.name == 'h3':
-                            date = self.tag_to_string(p)
-                            break
-                        p = p.previousSibling
-                    articles.append({'title':title,'url':url,'description':desc,'date':date})
-                    if len(articles) >= max_articles:
-                        break
-
-            if articles:
-                feeds.append((cat_name, articles))
-
-        return feeds
-
--- a/recipes/the_feature.recipe
+++ b/recipes/the_feature.recipe
@ -0,0 +1,11 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1365777047(BasicNewsRecipe):
+    title          = u'The Feature'
+    __author__ = 'Jose Pinto'
+    language = 'en'
+    oldest_article = 30
+    max_articles_per_feed = 100
+    auto_cleanup = True
+    use_embedded_content = False
+    feeds          = [(u'Latest', u'http://thefeature.net/rss/links')]