Update Read It Later

This commit is contained in:
Kovid Goyal 2013-05-26 20:40:29 +05:30
parent 072ce4c5b7
commit 55a11e5037

View File

@ -1,49 +1,50 @@
""" """
Pocket Calibre Recipe v1.3 Pocket Calibre Recipe v1.4
""" """
from calibre import strftime from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
import urllib2 from string import Template
import urllib
import json import json
import operator import operator
import tempfile
import re import re
import tempfile
import urllib
import urllib2
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = ''' __copyright__ = '''
2010, Darko Miletic <darko.miletic at gmail.com> 2010, Darko Miletic <darko.miletic at gmail.com>
2011, Przemyslaw Kryger <pkryger at gmail.com> 2011, Przemyslaw Kryger <pkryger at gmail.com>
2012, tBunnyMan <Wag That Tail At Me dot com> 2012-2013, tBunnyMan <Wag That Tail At Me dot com>
''' '''
class Pocket(BasicNewsRecipe): class Pocket(BasicNewsRecipe):
title = 'Pocket' title = 'Pocket'
__author__ = 'Darko Miletic, Przemyslaw Kryger, Keith Callenberg, tBunnyMan' __author__ = 'Darko Miletic, Przemyslaw Kryger, Keith Callenberg, tBunnyMan'
description = '''Personalized news feeds. Go to getpocket.com to setup up \ description = '''Personalized news feeds. Go to getpocket.com to setup up
your news. This version displays pages of articles from \ your news. This version displays pages of articles from
oldest to newest, with max & minimum counts, and marks articles \ oldest to newest, with max & minimum counts, and marks
read after downloading.''' articles read after downloading.'''
publisher = 'getpocket.com' publisher = 'getpocket.com'
category = 'news, custom' category = 'news, custom'
#Settings people change
max_articles_per_feed = 50 max_articles_per_feed = 50
minimum_articles = 10 minimum_articles = 10
#Set this to False for testing mark_as_read_after_dl = True # Set this to False for testing
mark_as_read_after_dl = False sort_method = 'oldest' # MUST be either 'oldest' or 'newest'
#MUST be either 'oldest' or 'newest'
sort_method = 'oldest'
# To filter by tag this needs to be a single tag in quotes; IE 'calibre' # To filter by tag this needs to be a single tag in quotes; IE 'calibre'
only_pull_tag = None only_pull_tag = None
#You don't want to change anything under here unless you REALLY know what you are doing #You don't want to change anything under
no_stylesheets = True no_stylesheets = True
use_embedded_content = False use_embedded_content = False
needs_subscription = True needs_subscription = True
articles_are_obfuscated = True articles_are_obfuscated = True
apikey = '19eg0e47pbT32z4793Tf021k99Afl889' apikey = '19eg0e47pbT32z4793Tf021k99Afl889'
index_url = u'http://getpocket.com' index_url = u'http://getpocket.com'
ajax_url = u'http://getpocket.com/a/x/getArticle.php'
read_api_url = index_url + u'/v3/get' read_api_url = index_url + u'/v3/get'
modify_api_url = index_url + u'/v3/send' modify_api_url = index_url + u'/v3/send'
legacy_login_url = index_url + u'/l' # We use this to cheat oAuth legacy_login_url = index_url + u'/l' # We use this to cheat oAuth
@ -51,11 +52,14 @@ class Pocket(BasicNewsRecipe):
def get_browser(self, *args, **kwargs): def get_browser(self, *args, **kwargs):
""" """
We need to pretend to be a recent version of safari for the mac to prevent User-Agent checks We need to pretend to be a recent version of safari for the mac to
Pocket api requires username and password so fail loudly if it's missing from the config. prevent User-Agent checks Pocket api requires username and password so
fail loudly if it's missing from the config.
""" """
br = BasicNewsRecipe.get_browser(self, br = BasicNewsRecipe.get_browser(self,
user_agent='Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_4; en-us) AppleWebKit/533.19.4 (KHTML, like Gecko) Version/5.0.3 Safari/533.19.4') user_agent='Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_4; \
en-us) AppleWebKit/533.19.4 (KHTML, like Gecko) \
Version/5.0.3 Safari/533.19.4')
if self.username is not None and self.password is not None: if self.username is not None and self.password is not None:
br.open(self.legacy_login_url) br.open(self.legacy_login_url)
br.select_form(nr=0) br.select_form(nr=0)
@ -63,7 +67,7 @@ class Pocket(BasicNewsRecipe):
br['password'] = self.password br['password'] = self.password
br.submit() br.submit()
else: else:
self.user_error("This Recipe requires authentication, please configured user & pass") self.user_error("This Recipe requires authentication")
return br return br
def get_auth_uri(self): def get_auth_uri(self):
@ -71,21 +75,20 @@ class Pocket(BasicNewsRecipe):
uri = "" uri = ""
uri = u'{0}&apikey={1!s}'.format(uri, self.apikey) uri = u'{0}&apikey={1!s}'.format(uri, self.apikey)
if self.username is None or self.password is None: if self.username is None or self.password is None:
self.user_error("Username or password is blank. Pocket no longer supports blank passwords") self.user_error("Username or password is blank.")
else: else:
uri = u'{0}&username={1!s}'.format(uri, self.username) uri = u'{0}&username={1!s}'.format(uri, self.username)
uri = u'{0}&password={1!s}'.format(uri, self.password) uri = u'{0}&password={1!s}'.format(uri, self.password)
return uri return uri
def get_pull_articles_uri(self): def get_pull_articles_uri(self):
"""Return the part of the uri that has all of the get request settings"""
uri = "" uri = ""
uri = u'{0}&state={1}'.format(uri, u'unread') # TODO This could be modded to allow pulling archives uri = u'{0}&state={1}'.format(uri, u'unread')
uri = u'{0}&contentType={1}'.format(uri, u'article') # TODO This COULD return images too uri = u'{0}&contentType={1}'.format(uri, u'article')
uri = u'{0}&sort={1}'.format(uri, self.sort_method) uri = u'{0}&sort={1}'.format(uri, self.sort_method)
uri = u'{0}&count={1!s}'.format(uri, self.max_articles_per_feed) uri = u'{0}&count={1!s}'.format(uri, self.max_articles_per_feed)
if self.only_pull_tag is not None: if self.only_pull_tag is not None:
uri = u'{0}tag={1}'.format(uri, self.only_pull_tag) uri = u'{0}&tag={1}'.format(uri, self.only_pull_tag)
return uri return uri
def parse_index(self): def parse_index(self):
@ -100,11 +103,12 @@ class Pocket(BasicNewsRecipe):
response = urllib2.urlopen(request) response = urllib2.urlopen(request)
pocket_feed = json.load(response)['list'] pocket_feed = json.load(response)['list']
except urllib2.HTTPError as e: except urllib2.HTTPError as e:
self.log.exception("Pocket returned an error: {0}\nurl: {1}".format(e, fetch_url)) self.log.exception("Pocket returned an error: {0}".format(e.info()))
return [] return []
except urllib2.URLError as e: except urllib2.URLError as e:
self.log.exception("Unable to connect to getpocket.com's api: {0}\nurl: {1}".format(e, fetch_url)) self.log.exception("Unable to connect to getpocket.com's api: {0}\nurl: {1}".format(e, fetch_url))
return [] return []
if len(pocket_feed) < self.minimum_articles: if len(pocket_feed) < self.minimum_articles:
self.mark_as_read_after_dl = False self.mark_as_read_after_dl = False
self.user_error("Only {0} articles retrieved, minimum_articles not reached".format(len(pocket_feed))) self.user_error("Only {0} articles retrieved, minimum_articles not reached".format(len(pocket_feed)))
@ -120,39 +124,65 @@ class Pocket(BasicNewsRecipe):
'sort': pocket_article[1]['sort_id'] 'sort': pocket_article[1]['sort_id']
}) })
self.articles = sorted(self.articles, key=operator.itemgetter('sort')) self.articles = sorted(self.articles, key=operator.itemgetter('sort'))
print self.articles
return [("My Pocket Articles for {0}".format(strftime('[%I:%M %p]')), self.articles)] return [("My Pocket Articles for {0}".format(strftime('[%I:%M %p]')), self.articles)]
def get_obfuscated_article(self, url): def get_textview(self, url):
"""
Since Pocket's v3 API they removed access to textview. They also
redesigned their page to make it much harder to scrape their textview.
We need to pull the article, retrieve the formcheck id, then use it
to querty for the json version
This function will break when pocket hates us
"""
ajax_url = self.index_url + u'/a/x/getArticle.php'
soup = self.index_to_soup(url) soup = self.index_to_soup(url)
formcheck_script_tag = soup.find('script', text=re.compile("formCheck")) fc_tag = soup.find('script', text=re.compile("formCheck"))
form_check = formcheck_script_tag.split("=")[1].replace("'", "").replace(";", "").strip() fc_id = re.search(r"\'([\d\w]+)\'", fc_tag).group(1)
article_id = url.split("/")[-1] article_id = url.split("/")[-1]
data = urllib.urlencode({'itemId': article_id, 'formCheck': form_check}) data = urllib.urlencode({'itemId': article_id, 'formCheck': fc_id})
response = self.browser.open(self.ajax_url, data) try:
article_json = json.load(response)['article']['article'] response = self.browser.open(ajax_url, data)
except urllib2.HTTPError as e:
self.log.exception("unable to get textview {0}".format(e.info()))
raise e
return json.load(response)['article']
def get_obfuscated_article(self, url):
"""
Our get_textview returns parsed json so prettify it to something well
parsed by calibre.
"""
article = self.get_textview(url)
template = Template('<h1>$title</h1>\
$img\
<div class="body">$body</div>')
try:
image = '<img src="{0}" \>'.format(article['images']['1']['src'])
except:
image = ''
with tempfile.NamedTemporaryFile(delete=False) as tf: with tempfile.NamedTemporaryFile(delete=False) as tf:
tf.write(article_json) tf.write(template.safe_substitute(
title=article['title'],
img=image,
body=article['article']
))
return tf.name return tf.name
def mark_as_read(self, mark_list): def mark_as_read(self, mark_list):
formatted_list = [] actions_list = []
for article_id in mark_list: for article_id in mark_list:
formatted_list.append({ actions_list.append({
'action': 'archive', 'action': 'archive',
'item_id': article_id 'item_id': article_id
}) })
command = { mark_read_url = u'{0}?actions={1}{2}'.format(
'actions': formatted_list
}
mark_read_url = u'{0}?{1}'.format(
self.modify_api_url, self.modify_api_url,
json.dumps(actions_list, separators=(',', ':')),
self.get_auth_uri() self.get_auth_uri()
) )
try: try:
request = urllib2.Request(mark_read_url, json.dumps(command)) request = urllib2.Request(mark_read_url)
response = urllib2.urlopen(request) urllib2.urlopen(request)
print u'response = {0}'.format(response.info())
except urllib2.HTTPError as e: except urllib2.HTTPError as e:
self.log.exception('Pocket returned an error while archiving articles: {0}'.format(e)) self.log.exception('Pocket returned an error while archiving articles: {0}'.format(e))
return [] return []
@ -162,7 +192,7 @@ class Pocket(BasicNewsRecipe):
def cleanup(self): def cleanup(self):
if self.mark_as_read_after_dl: if self.mark_as_read_after_dl:
self.mark_as_read([x[1]['item_id'] for x in self.articles]) self.mark_as_read([x['item_id'] for x in self.articles])
else: else:
pass pass
@ -192,4 +222,4 @@ class Pocket(BasicNewsRecipe):
self.log.exception(error_message) self.log.exception(error_message)
raise RuntimeError(error_message) raise RuntimeError(error_message)
# vim:ft=python # vim:ft=python tabstop=8 expandtab shiftwidth=4 softtabstop=4