calibre/recipes/readitlater.recipe
2013-04-28 11:42:41 +05:30

196 lines
8.1 KiB
Python

"""
Pocket Calibre Recipe v1.3
"""
from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe
import urllib2
import urllib
import json
import operator
import tempfile
import re
__license__ = 'GPL v3'
__copyright__ = '''
2010, Darko Miletic <darko.miletic at gmail.com>
2011, Przemyslaw Kryger <pkryger at gmail.com>
2012, tBunnyMan <Wag That Tail At Me dot com>
'''
class Pocket(BasicNewsRecipe):
title = 'Pocket'
__author__ = 'Darko Miletic, Przemyslaw Kryger, Keith Callenberg, tBunnyMan'
description = '''Personalized news feeds. Go to getpocket.com to setup up \
your news. This version displays pages of articles from \
oldest to newest, with max & minimum counts, and marks articles \
read after downloading.'''
publisher = 'getpocket.com'
category = 'news, custom'
max_articles_per_feed = 50
minimum_articles = 10
#Set this to False for testing
mark_as_read_after_dl = False
#MUST be either 'oldest' or 'newest'
sort_method = 'oldest'
#To filter by tag this needs to be a single tag in quotes; IE 'calibre'
only_pull_tag = None
#You don't want to change anything under here unless you REALLY know what you are doing
no_stylesheets = True
use_embedded_content = False
needs_subscription = True
articles_are_obfuscated = True
apikey = '19eg0e47pbT32z4793Tf021k99Afl889'
index_url = u'http://getpocket.com'
ajax_url = u'http://getpocket.com/a/x/getArticle.php'
read_api_url = index_url + u'/v3/get'
modify_api_url = index_url + u'/v3/send'
legacy_login_url = index_url + u'/l' # We use this to cheat oAuth
articles = []
def get_browser(self, *args, **kwargs):
"""
We need to pretend to be a recent version of safari for the mac to prevent User-Agent checks
Pocket api requires username and password so fail loudly if it's missing from the config.
"""
br = BasicNewsRecipe.get_browser(self,
user_agent='Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_4; en-us) AppleWebKit/533.19.4 (KHTML, like Gecko) Version/5.0.3 Safari/533.19.4')
if self.username is not None and self.password is not None:
br.open(self.legacy_login_url)
br.select_form(nr=0)
br['feed_id'] = self.username
br['password'] = self.password
br.submit()
else:
self.user_error("This Recipe requires authentication, please configured user & pass")
return br
def get_auth_uri(self):
"""Quick function to return the authentication part of the url"""
uri = ""
uri = u'{0}&apikey={1!s}'.format(uri, self.apikey)
if self.username is None or self.password is None:
self.user_error("Username or password is blank. Pocket no longer supports blank passwords")
else:
uri = u'{0}&username={1!s}'.format(uri, self.username)
uri = u'{0}&password={1!s}'.format(uri, self.password)
return uri
def get_pull_articles_uri(self):
"""Return the part of the uri that has all of the get request settings"""
uri = ""
uri = u'{0}&state={1}'.format(uri, u'unread') # TODO This could be modded to allow pulling archives
uri = u'{0}&contentType={1}'.format(uri, u'article') # TODO This COULD return images too
uri = u'{0}&sort={1}'.format(uri, self.sort_method)
uri = u'{0}&count={1!s}'.format(uri, self.max_articles_per_feed)
if self.only_pull_tag is not None:
uri = u'{0}tag={1}'.format(uri, self.only_pull_tag)
return uri
def parse_index(self):
pocket_feed = []
fetch_url = u"{0}?{1}{2}".format(
self.read_api_url,
self.get_auth_uri(),
self.get_pull_articles_uri()
)
try:
request = urllib2.Request(fetch_url)
response = urllib2.urlopen(request)
pocket_feed = json.load(response)['list']
except urllib2.HTTPError as e:
self.log.exception("Pocket returned an error: {0}\nurl: {1}".format(e, fetch_url))
return []
except urllib2.URLError as e:
self.log.exception("Unable to connect to getpocket.com's api: {0}\nurl: {1}".format(e, fetch_url))
return []
if len(pocket_feed) < self.minimum_articles:
self.mark_as_read_after_dl = False
self.user_error("Only {0} articles retrieved, minimum_articles not reached".format(len(pocket_feed)))
for pocket_article in pocket_feed.iteritems():
self.articles.append({
'item_id': pocket_article[0],
'title': pocket_article[1]['resolved_title'],
'date': pocket_article[1]['time_updated'],
'url': u'{0}/a/read/{1}'.format(self.index_url, pocket_article[0]),
'real_url': pocket_article[1]['resolved_url'],
'description': pocket_article[1]['excerpt'],
'sort': pocket_article[1]['sort_id']
})
self.articles = sorted(self.articles, key=operator.itemgetter('sort'))
print self.articles
return [("My Pocket Articles for {0}".format(strftime('[%I:%M %p]')), self.articles)]
def get_obfuscated_article(self, url):
soup = self.index_to_soup(url)
formcheck_script_tag = soup.find('script', text=re.compile("formCheck"))
form_check = formcheck_script_tag.split("=")[1].replace("'", "").replace(";", "").strip()
article_id = url.split("/")[-1]
data = urllib.urlencode({'itemId': article_id, 'formCheck': form_check})
response = self.browser.open(self.ajax_url, data)
article_json = json.load(response)['article']['article']
with tempfile.NamedTemporaryFile(delete=False) as tf:
tf.write(article_json)
return tf.name
def mark_as_read(self, mark_list):
formatted_list = []
for article_id in mark_list:
formatted_list.append({
'action': 'archive',
'item_id': article_id
})
command = {
'actions': formatted_list
}
mark_read_url = u'{0}?{1}'.format(
self.modify_api_url,
self.get_auth_uri()
)
try:
request = urllib2.Request(mark_read_url, json.dumps(command))
response = urllib2.urlopen(request)
print u'response = {0}'.format(response.info())
except urllib2.HTTPError as e:
self.log.exception('Pocket returned an error while archiving articles: {0}'.format(e))
return []
except urllib2.URLError as e:
self.log.exception("Unable to connect to getpocket.com's modify api: {0}".format(e))
return []
def cleanup(self):
if self.mark_as_read_after_dl:
self.mark_as_read([x[1]['item_id'] for x in self.articles])
else:
pass
def default_cover(self, cover_file):
"""
Create a generic cover for recipes that don't have a cover
This override adds time to the cover
"""
try:
from calibre.ebooks import calibre_cover
title = self.title if isinstance(self.title, unicode) else \
self.title.decode('utf-8', 'replace')
date = strftime(self.timefmt)
time = strftime('[%I:%M %p]')
img_data = calibre_cover(title, date, time)
cover_file.write(img_data)
cover_file.flush()
except:
self.log.exception('Failed to generate default cover')
return False
return True
def user_error(self, error_message):
if hasattr(self, 'abort_recipe_processing'):
self.abort_recipe_processing(error_message)
else:
self.log.exception(error_message)
raise RuntimeError(error_message)
# vim:ft=python