From 5c0cd6e0705ac9772267576c63c9af657a02e9e9 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 19 Apr 2012 10:34:12 +0530 Subject: [PATCH] ... --- recipes/readitlater.recipe | 224 +++++++++++++++++-------------------- 1 file changed, 100 insertions(+), 124 deletions(-) diff --git a/recipes/readitlater.recipe b/recipes/readitlater.recipe index b195aa2cdc..8ed7c43610 100644 --- a/recipes/readitlater.recipe +++ b/recipes/readitlater.recipe @@ -1,155 +1,131 @@ -''' -readitlaterlist.com -''' +""" +Pocket Calibre Recipe v1.0 +""" __license__ = 'GPL v3' __copyright__ = ''' 2010, Darko Miletic 2011, Przemyslaw Kryger -2011, Keith Callenberg 2012, tBunnyMan -2012, Alayn Gortazar ''' -from contextlib import closing +from calibre import strftime from calibre.web.feeds.news import BasicNewsRecipe -from calibre.ebooks.BeautifulSoup import Tag -import json -import urllib -import urllib2 -class Readitlater(BasicNewsRecipe): - title = 'Read It Later' - __author__ = 'Darko Miletic, Przemyslaw Kryger, Keith Callenberg, tBunnyMan, Alayn Gortazar' - description = '''Personalized news feeds. Go to readitlaterlist.com to - setup up your news. Fill in your account - username, and optionally you can add your password.''' - publisher = 'readitlaterlist.com' + +class Pocket(BasicNewsRecipe): + title = 'Pocket' + __author__ = 'Darko Miletic, Przemyslaw Kryger, Keith Callenberg, tBunnyMan' + description = '''Personalized news feeds. Go to getpocket.com to setup up \ + your news. This version displays pages of articles from \ + oldest to newest, with max & minimum counts, and marks articles \ + read after downloading.''' + publisher = 'getpocket.com' category = 'news, custom' oldest_article = 7 max_articles_per_feed = 50 - minimum_articles = 1 + minimum_articles = 10 + mark_as_read_after_dl = True no_stylesheets = True use_embedded_content = False needs_subscription = True - KEY = '8e0p5f19A74emL3a47goP87m69d4VF8b' - API_TEXT_INDEX = 'https://text.readitlaterlist.com/' - API_INDEX = 'https://readitlaterlist.com/' - INDEX = 'https://getpocket.com/' + INDEX = u'http://getpocket.com' LOGIN = INDEX + u'/l' - enhanced_version = True + readList = [] - articles = [] - - feeds = [(u'Unread articles' , INDEX)] def get_browser(self): br = BasicNewsRecipe.get_browser() - if self.enhanced_version: - if self.username is not None: - br.open(self.LOGIN) - br.select_form(nr=0) - br['feed_id'] = self.username - if self.password is not None: - br['password'] = self.password - br.submit() + if self.username is not None: + br.open(self.LOGIN) + br.select_form(nr=0) + br['feed_id'] = self.username + if self.password is not None: + br['password'] = self.password + br.submit() return br - def get_auth_params(self): - auth_params = 'apikey=' + self.KEY - if self.username is not None: - auth_params += '&username=' + self.username - if self.password is not None: - auth_params += '&password=' + self.password - return auth_params + def get_feeds(self): + self.report_progress(0, ('Fetching list of pages...')) + lfeeds = [] + i = 1 + feedurl = self.INDEX + u'/unread/1' + while True: + title = u'Unread articles, page ' + str(i) + lfeeds.insert(0, (title, feedurl)) + self.report_progress(0, ('Got ') + str(i) + (' pages')) + i += 1 + soup = self.index_to_soup(feedurl) + ritem = soup.find('a', attrs={'id':'next', 'class':'active'}) + if ritem is None: + break + feedurl = self.INDEX + ritem['href'] + return lfeeds def parse_index(self): - index = self.API_INDEX + 'v2/get?' + self.get_auth_params() - index += '&state=unread' - index += '&count=' + str(self.max_articles_per_feed) - - open_func = getattr(self.browser, 'open_novisit', self.browser.open) - with closing(open_func(index)) as f: - results = f.read() - if not results: - raise RuntimeError('Could not fetch index!') - - json_obj = json.loads(results) - - if len(json_obj['list']) >= self.minimum_articles: - for item in json_obj['list'].iteritems(): - # TODO: This URL should be modified by it's corresponding API call in a future. - # Actually is not possible to get the Article View potential throught an API call (12/04/2012) - if self.enhanced_version: - dataurl = self.INDEX + 'a/x/getArticle.php?itemId=' + item[1]['item_id'] + totalfeeds = [] + articlesToGrab = self.max_articles_per_feed + lfeeds = self.get_feeds() + for feedobj in lfeeds: + if articlesToGrab < 1: + break + feedtitle, feedurl = feedobj + self.report_progress(0, ('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl)) + articles = [] + soup = self.index_to_soup(feedurl) + ritem = soup.find('ul', attrs={'id':'list'}) + for item in reversed(ritem.findAll('li')): + if articlesToGrab < 1: + break else: - dataurl = self.API_TEXT_INDEX + 'v2/text?' + self.get_auth_params() - dataurl += '&url=' + item[1]['url'] - self.articles.append({ - 'title':item[1]['title'], - 'date':item[1]['time_added'], - 'url':dataurl, - 'description':item[1]['item_id'], - 'real_url':item[1]['url'] - }) - else: + articlesToGrab -= 1 + description = '' + atag = item.find('a', attrs={'class':'text'}) + if atag and atag.has_key('href'): + url = self.INDEX + atag['href'] + title = self.tag_to_string(item.div) + date = strftime(self.timefmt) + articles.append({ + 'title' :title + ,'date' :date + ,'url' :url + ,'description':description + }) + readLink = item.find('a', attrs={'class':'check'})['href'] + self.readList.append(readLink) + totalfeeds.append((feedtitle, articles)) + if len(self.readList) < self.minimum_articles: raise Exception("Not enough articles in RIL! Change minimum_articles or add more.") + return totalfeeds - return [('Unread', self.articles)] - - def preprocess_raw_html(self, raw_html, url): - # get article and image urls from json object - if self.enhanced_version: - json_obj = json.loads(raw_html) - self.images = {} - for image in json_obj['article']['images']: - self.images[image] = json_obj['article']['images'][image]['src'] - title = '

{title}

'.format(title=json_obj['article']['title']) - link = '

Original: {url}

'.format(url=json_obj['article']['resolvedUrl']) - html = link + title + json_obj['article']['article'] - else: - html = raw_html - return html + '
' - - def preprocess_html(self, soup): - # Insert images on RIL_IMG_# divs - if self.enhanced_version: - for key, url in self.images.iteritems(): - imgtag = Tag(soup, 'img') - imgtag['src'] = url - div = soup.find('div', attrs={'id':'RIL_IMG_' + key}) - div.insert(0, imgtag) - return soup + def mark_as_read(self, markList): + br = self.get_browser() + for link in markList: + url = self.INDEX + link + print 'Marking read: ', url + response = br.open(url) + print response.info() def cleanup(self): - # From a list of urls, create a human-readable JSON string - # suitable for passing to the ReadItLater SEND::READ method. - self.markAsRead(self.createMarkList(self.articles)) - - def createMarkList(self, articles): - urls = [] - for article in self.articles: - urls.append(article['real_url']) - items = ['"%d": {"url": "%s"}' % (n,u) for n,u in enumerate(urls)] - s = '{\n %s\n}' % (',\n '.join(items),) - return s - - def markAsRead(self, markList): - url = self.API_INDEX + 'v2/send' - values = { - 'username' : self.username, - 'password' : self.password, - 'apikey' : self.KEY, - 'read' : markList - } - data = urllib.urlencode(values) + if self.mark_as_read_after_dl: + self.mark_as_read(self.readList) + else: + pass + def default_cover(self, cover_file): + ''' + Create a generic cover for recipes that don't have a cover + This override adds time to the cover + ''' try: - print 'Calling ReadItLater API...' - request = urllib2.Request(url,data) - response = urllib2.urlopen(request) - response.read() - print 'response =', response.code - except urllib2.HTTPError as e: - print 'The server could not fulfill the request: ', e - except urllib2.URLError as e: - print 'The call to ReadItLater API failed:', e + from calibre.ebooks import calibre_cover + title = self.title if isinstance(self.title, unicode) else \ + self.title.decode(preferred_encoding, 'replace') + date = strftime(self.timefmt) + time = strftime('[%I:%M %p]') + img_data = calibre_cover(title, date, time) + cover_file.write(img_data) + cover_file.flush() + except: + self.log.exception('Failed to generate default cover') + return False + return True \ No newline at end of file