diff --git a/recipes/readitlater.recipe b/recipes/readitlater.recipe index 6f48ac116b..60e77ae558 100644 --- a/recipes/readitlater.recipe +++ b/recipes/readitlater.recipe @@ -1,61 +1,65 @@ """ -Pocket Calibre Recipe v1.3 +Pocket Calibre Recipe v1.4 """ from calibre import strftime from calibre.web.feeds.news import BasicNewsRecipe -import urllib2 -import urllib +from string import Template import json import operator -import tempfile import re +import tempfile +import urllib +import urllib2 -__license__ = 'GPL v3' + +__license__ = 'GPL v3' __copyright__ = ''' 2010, Darko Miletic 2011, Przemyslaw Kryger -2012, tBunnyMan +2012-2013, tBunnyMan ''' class Pocket(BasicNewsRecipe): - title = 'Pocket' - __author__ = 'Darko Miletic, Przemyslaw Kryger, Keith Callenberg, tBunnyMan' - description = '''Personalized news feeds. Go to getpocket.com to setup up \ - your news. This version displays pages of articles from \ - oldest to newest, with max & minimum counts, and marks articles \ - read after downloading.''' - publisher = 'getpocket.com' - category = 'news, custom' + title = 'Pocket' + __author__ = 'Darko Miletic, Przemyslaw Kryger, Keith Callenberg, tBunnyMan' + description = '''Personalized news feeds. Go to getpocket.com to setup up + your news. This version displays pages of articles from + oldest to newest, with max & minimum counts, and marks + articles read after downloading.''' + publisher = 'getpocket.com' + category = 'news, custom' + + #Settings people change max_articles_per_feed = 50 minimum_articles = 10 - #Set this to False for testing - mark_as_read_after_dl = False - #MUST be either 'oldest' or 'newest' - sort_method = 'oldest' - #To filter by tag this needs to be a single tag in quotes; IE 'calibre' + mark_as_read_after_dl = True # Set this to False for testing + sort_method = 'oldest' # MUST be either 'oldest' or 'newest' + # To filter by tag this needs to be a single tag in quotes; IE 'calibre' only_pull_tag = None - #You don't want to change anything under here unless you REALLY know what you are doing - no_stylesheets = True - use_embedded_content = False - needs_subscription = True + #You don't want to change anything under + no_stylesheets = True + use_embedded_content = False + needs_subscription = True articles_are_obfuscated = True - apikey = '19eg0e47pbT32z4793Tf021k99Afl889' - index_url = u'http://getpocket.com' - ajax_url = u'http://getpocket.com/a/x/getArticle.php' - read_api_url = index_url + u'/v3/get' - modify_api_url = index_url + u'/v3/send' - legacy_login_url = index_url + u'/l' # We use this to cheat oAuth - articles = [] + apikey = '19eg0e47pbT32z4793Tf021k99Afl889' + index_url = u'http://getpocket.com' + read_api_url = index_url + u'/v3/get' + modify_api_url = index_url + u'/v3/send' + legacy_login_url = index_url + u'/l' # We use this to cheat oAuth + articles = [] def get_browser(self, *args, **kwargs): """ - We need to pretend to be a recent version of safari for the mac to prevent User-Agent checks - Pocket api requires username and password so fail loudly if it's missing from the config. + We need to pretend to be a recent version of safari for the mac to + prevent User-Agent checks Pocket api requires username and password so + fail loudly if it's missing from the config. """ br = BasicNewsRecipe.get_browser(self, - user_agent='Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_4; en-us) AppleWebKit/533.19.4 (KHTML, like Gecko) Version/5.0.3 Safari/533.19.4') + user_agent='Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_4; \ + en-us) AppleWebKit/533.19.4 (KHTML, like Gecko) \ + Version/5.0.3 Safari/533.19.4') if self.username is not None and self.password is not None: br.open(self.legacy_login_url) br.select_form(nr=0) @@ -63,7 +67,7 @@ class Pocket(BasicNewsRecipe): br['password'] = self.password br.submit() else: - self.user_error("This Recipe requires authentication, please configured user & pass") + self.user_error("This Recipe requires authentication") return br def get_auth_uri(self): @@ -71,21 +75,20 @@ class Pocket(BasicNewsRecipe): uri = "" uri = u'{0}&apikey={1!s}'.format(uri, self.apikey) if self.username is None or self.password is None: - self.user_error("Username or password is blank. Pocket no longer supports blank passwords") + self.user_error("Username or password is blank.") else: uri = u'{0}&username={1!s}'.format(uri, self.username) uri = u'{0}&password={1!s}'.format(uri, self.password) return uri def get_pull_articles_uri(self): - """Return the part of the uri that has all of the get request settings""" uri = "" - uri = u'{0}&state={1}'.format(uri, u'unread') # TODO This could be modded to allow pulling archives - uri = u'{0}&contentType={1}'.format(uri, u'article') # TODO This COULD return images too + uri = u'{0}&state={1}'.format(uri, u'unread') + uri = u'{0}&contentType={1}'.format(uri, u'article') uri = u'{0}&sort={1}'.format(uri, self.sort_method) uri = u'{0}&count={1!s}'.format(uri, self.max_articles_per_feed) if self.only_pull_tag is not None: - uri = u'{0}tag={1}'.format(uri, self.only_pull_tag) + uri = u'{0}&tag={1}'.format(uri, self.only_pull_tag) return uri def parse_index(self): @@ -100,11 +103,12 @@ class Pocket(BasicNewsRecipe): response = urllib2.urlopen(request) pocket_feed = json.load(response)['list'] except urllib2.HTTPError as e: - self.log.exception("Pocket returned an error: {0}\nurl: {1}".format(e, fetch_url)) + self.log.exception("Pocket returned an error: {0}".format(e.info())) return [] except urllib2.URLError as e: self.log.exception("Unable to connect to getpocket.com's api: {0}\nurl: {1}".format(e, fetch_url)) return [] + if len(pocket_feed) < self.minimum_articles: self.mark_as_read_after_dl = False self.user_error("Only {0} articles retrieved, minimum_articles not reached".format(len(pocket_feed))) @@ -120,39 +124,65 @@ class Pocket(BasicNewsRecipe): 'sort': pocket_article[1]['sort_id'] }) self.articles = sorted(self.articles, key=operator.itemgetter('sort')) - print self.articles return [("My Pocket Articles for {0}".format(strftime('[%I:%M %p]')), self.articles)] - def get_obfuscated_article(self, url): + def get_textview(self, url): + """ + Since Pocket's v3 API they removed access to textview. They also + redesigned their page to make it much harder to scrape their textview. + We need to pull the article, retrieve the formcheck id, then use it + to querty for the json version + This function will break when pocket hates us + """ + ajax_url = self.index_url + u'/a/x/getArticle.php' soup = self.index_to_soup(url) - formcheck_script_tag = soup.find('script', text=re.compile("formCheck")) - form_check = formcheck_script_tag.split("=")[1].replace("'", "").replace(";", "").strip() + fc_tag = soup.find('script', text=re.compile("formCheck")) + fc_id = re.search(r"\'([\d\w]+)\'", fc_tag).group(1) article_id = url.split("/")[-1] - data = urllib.urlencode({'itemId': article_id, 'formCheck': form_check}) - response = self.browser.open(self.ajax_url, data) - article_json = json.load(response)['article']['article'] + data = urllib.urlencode({'itemId': article_id, 'formCheck': fc_id}) + try: + response = self.browser.open(ajax_url, data) + except urllib2.HTTPError as e: + self.log.exception("unable to get textview {0}".format(e.info())) + raise e + return json.load(response)['article'] + + def get_obfuscated_article(self, url): + """ + Our get_textview returns parsed json so prettify it to something well + parsed by calibre. + """ + article = self.get_textview(url) + template = Template('

$title

\ + $img\ +
$body
') + try: + image = ''.format(article['images']['1']['src']) + except: + image = '' with tempfile.NamedTemporaryFile(delete=False) as tf: - tf.write(article_json) + tf.write(template.safe_substitute( + title=article['title'], + img=image, + body=article['article'] + )) return tf.name def mark_as_read(self, mark_list): - formatted_list = [] + actions_list = [] for article_id in mark_list: - formatted_list.append({ + actions_list.append({ 'action': 'archive', 'item_id': article_id }) - command = { - 'actions': formatted_list - } - mark_read_url = u'{0}?{1}'.format( + mark_read_url = u'{0}?actions={1}{2}'.format( self.modify_api_url, + json.dumps(actions_list, separators=(',', ':')), self.get_auth_uri() ) try: - request = urllib2.Request(mark_read_url, json.dumps(command)) - response = urllib2.urlopen(request) - print u'response = {0}'.format(response.info()) + request = urllib2.Request(mark_read_url) + urllib2.urlopen(request) except urllib2.HTTPError as e: self.log.exception('Pocket returned an error while archiving articles: {0}'.format(e)) return [] @@ -162,7 +192,7 @@ class Pocket(BasicNewsRecipe): def cleanup(self): if self.mark_as_read_after_dl: - self.mark_as_read([x[1]['item_id'] for x in self.articles]) + self.mark_as_read([x['item_id'] for x in self.articles]) else: pass @@ -174,7 +204,7 @@ class Pocket(BasicNewsRecipe): try: from calibre.ebooks import calibre_cover title = self.title if isinstance(self.title, unicode) else \ - self.title.decode('utf-8', 'replace') + self.title.decode('utf-8', 'replace') date = strftime(self.timefmt) time = strftime('[%I:%M %p]') img_data = calibre_cover(title, date, time) @@ -192,4 +222,4 @@ class Pocket(BasicNewsRecipe): self.log.exception(error_message) raise RuntimeError(error_message) -# vim:ft=python +# vim:ft=python tabstop=8 expandtab shiftwidth=4 softtabstop=4