diff --git a/recipes/givemesomethingtoread.recipe b/recipes/givemesomethingtoread.recipe deleted file mode 100644 index 09b758536f..0000000000 --- a/recipes/givemesomethingtoread.recipe +++ /dev/null @@ -1,90 +0,0 @@ -import re -from calibre.web.feeds.news import BasicNewsRecipe - -class GiveMeSomethingToRead(BasicNewsRecipe): - title = u'Give Me Something To Read' - description = 'Curation / aggregation of articles on diverse topics' - language = 'en' - __author__ = 'barty on mobileread.com forum' - max_articles_per_feed = 100 - no_stylesheets = False - timefmt = ' [%a, %d %b, %Y]' - oldest_article = 365 - auto_cleanup = True - INDEX = 'http://givemesomethingtoread.com' - CATEGORIES = [ - # comment out categories you don't want - # (user friendly name, system name, max number of articles to load) - ('The Arts','arts',25), - ('Science','science',30), - ('Technology','technology',30), - ('Politics','politics',20), - ('Media','media',30), - ('Crime','crime',15), - ('Other articles','',10) - ] - - def parse_index(self): - self.cover_url = 'http://thegretchenshow.files.wordpress.com/2009/12/well-read-cat-small.jpg' - feeds = [] - seen_urls = set([]) - regex = re.compile( r'http://(www\.)?([^/:]+)', re.I) - - for category in self.CATEGORIES: - - (cat_name, tag, max_articles) = category - - tagurl = '' if tag=='' else '/tagged/'+tag - self.log('Reading category:', cat_name) - - articles = [] - pageno = 1 - - while len(articles) < max_articles and pageno < 100: - - page = "%s%s/page/%d" % (self.INDEX, tagurl, pageno) if pageno > 1 else self.INDEX + tagurl - pageno += 1 - - self.log('\tReading page:', page) - try: - soup = self.index_to_soup(page) - except: - break - - headers = soup.findAll('h2') - if len(headers) == .0: - break - - for header in headers: - atag = header.find('a') - url = atag['href'] - # skip promotionals and duplicate - if url.startswith('http://givemesomethingtoread') or url.startswith('/') or url in seen_urls: - continue - seen_urls.add(url) - title = self.tag_to_string(header) - self.log('\tFound article:', title) - #self.log('\t', url) - desc = header.parent.find('blockquote') - desc = self.tag_to_string(desc) if desc else '' - m = regex.match( url) - if m: - desc = "[%s] %s" % (m.group(2), desc) - #self.log('\t', desc) - date = '' - p = header.parent.previousSibling - # navigate up to find h3, which contains the date - while p: - if hasattr(p,'name') and p.name == 'h3': - date = self.tag_to_string(p) - break - p = p.previousSibling - articles.append({'title':title,'url':url,'description':desc,'date':date}) - if len(articles) >= max_articles: - break - - if articles: - feeds.append((cat_name, articles)) - - return feeds - diff --git a/recipes/the_feature.recipe b/recipes/the_feature.recipe new file mode 100644 index 0000000000..efdeb5120d --- /dev/null +++ b/recipes/the_feature.recipe @@ -0,0 +1,11 @@ +from calibre.web.feeds.news import BasicNewsRecipe + +class AdvancedUserRecipe1365777047(BasicNewsRecipe): + title = u'The Feature' + __author__ = 'Jose Pinto' + language = 'en' + oldest_article = 30 + max_articles_per_feed = 100 + auto_cleanup = True + use_embedded_content = False + feeds = [(u'Latest', u'http://thefeature.net/rss/links')]