mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Update Read It Later
This commit is contained in:
parent
072ce4c5b7
commit
55a11e5037
@ -1,61 +1,65 @@
|
|||||||
"""
|
"""
|
||||||
Pocket Calibre Recipe v1.3
|
Pocket Calibre Recipe v1.4
|
||||||
"""
|
"""
|
||||||
from calibre import strftime
|
from calibre import strftime
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
import urllib2
|
from string import Template
|
||||||
import urllib
|
|
||||||
import json
|
import json
|
||||||
import operator
|
import operator
|
||||||
import tempfile
|
|
||||||
import re
|
import re
|
||||||
|
import tempfile
|
||||||
|
import urllib
|
||||||
|
import urllib2
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '''
|
__copyright__ = '''
|
||||||
2010, Darko Miletic <darko.miletic at gmail.com>
|
2010, Darko Miletic <darko.miletic at gmail.com>
|
||||||
2011, Przemyslaw Kryger <pkryger at gmail.com>
|
2011, Przemyslaw Kryger <pkryger at gmail.com>
|
||||||
2012, tBunnyMan <Wag That Tail At Me dot com>
|
2012-2013, tBunnyMan <Wag That Tail At Me dot com>
|
||||||
'''
|
'''
|
||||||
|
|
||||||
|
|
||||||
class Pocket(BasicNewsRecipe):
|
class Pocket(BasicNewsRecipe):
|
||||||
title = 'Pocket'
|
title = 'Pocket'
|
||||||
__author__ = 'Darko Miletic, Przemyslaw Kryger, Keith Callenberg, tBunnyMan'
|
__author__ = 'Darko Miletic, Przemyslaw Kryger, Keith Callenberg, tBunnyMan'
|
||||||
description = '''Personalized news feeds. Go to getpocket.com to setup up \
|
description = '''Personalized news feeds. Go to getpocket.com to setup up
|
||||||
your news. This version displays pages of articles from \
|
your news. This version displays pages of articles from
|
||||||
oldest to newest, with max & minimum counts, and marks articles \
|
oldest to newest, with max & minimum counts, and marks
|
||||||
read after downloading.'''
|
articles read after downloading.'''
|
||||||
publisher = 'getpocket.com'
|
publisher = 'getpocket.com'
|
||||||
category = 'news, custom'
|
category = 'news, custom'
|
||||||
|
|
||||||
|
#Settings people change
|
||||||
max_articles_per_feed = 50
|
max_articles_per_feed = 50
|
||||||
minimum_articles = 10
|
minimum_articles = 10
|
||||||
#Set this to False for testing
|
mark_as_read_after_dl = True # Set this to False for testing
|
||||||
mark_as_read_after_dl = False
|
sort_method = 'oldest' # MUST be either 'oldest' or 'newest'
|
||||||
#MUST be either 'oldest' or 'newest'
|
# To filter by tag this needs to be a single tag in quotes; IE 'calibre'
|
||||||
sort_method = 'oldest'
|
|
||||||
#To filter by tag this needs to be a single tag in quotes; IE 'calibre'
|
|
||||||
only_pull_tag = None
|
only_pull_tag = None
|
||||||
|
|
||||||
#You don't want to change anything under here unless you REALLY know what you are doing
|
#You don't want to change anything under
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
needs_subscription = True
|
needs_subscription = True
|
||||||
articles_are_obfuscated = True
|
articles_are_obfuscated = True
|
||||||
apikey = '19eg0e47pbT32z4793Tf021k99Afl889'
|
apikey = '19eg0e47pbT32z4793Tf021k99Afl889'
|
||||||
index_url = u'http://getpocket.com'
|
index_url = u'http://getpocket.com'
|
||||||
ajax_url = u'http://getpocket.com/a/x/getArticle.php'
|
read_api_url = index_url + u'/v3/get'
|
||||||
read_api_url = index_url + u'/v3/get'
|
modify_api_url = index_url + u'/v3/send'
|
||||||
modify_api_url = index_url + u'/v3/send'
|
legacy_login_url = index_url + u'/l' # We use this to cheat oAuth
|
||||||
legacy_login_url = index_url + u'/l' # We use this to cheat oAuth
|
articles = []
|
||||||
articles = []
|
|
||||||
|
|
||||||
def get_browser(self, *args, **kwargs):
|
def get_browser(self, *args, **kwargs):
|
||||||
"""
|
"""
|
||||||
We need to pretend to be a recent version of safari for the mac to prevent User-Agent checks
|
We need to pretend to be a recent version of safari for the mac to
|
||||||
Pocket api requires username and password so fail loudly if it's missing from the config.
|
prevent User-Agent checks Pocket api requires username and password so
|
||||||
|
fail loudly if it's missing from the config.
|
||||||
"""
|
"""
|
||||||
br = BasicNewsRecipe.get_browser(self,
|
br = BasicNewsRecipe.get_browser(self,
|
||||||
user_agent='Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_4; en-us) AppleWebKit/533.19.4 (KHTML, like Gecko) Version/5.0.3 Safari/533.19.4')
|
user_agent='Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_4; \
|
||||||
|
en-us) AppleWebKit/533.19.4 (KHTML, like Gecko) \
|
||||||
|
Version/5.0.3 Safari/533.19.4')
|
||||||
if self.username is not None and self.password is not None:
|
if self.username is not None and self.password is not None:
|
||||||
br.open(self.legacy_login_url)
|
br.open(self.legacy_login_url)
|
||||||
br.select_form(nr=0)
|
br.select_form(nr=0)
|
||||||
@ -63,7 +67,7 @@ class Pocket(BasicNewsRecipe):
|
|||||||
br['password'] = self.password
|
br['password'] = self.password
|
||||||
br.submit()
|
br.submit()
|
||||||
else:
|
else:
|
||||||
self.user_error("This Recipe requires authentication, please configured user & pass")
|
self.user_error("This Recipe requires authentication")
|
||||||
return br
|
return br
|
||||||
|
|
||||||
def get_auth_uri(self):
|
def get_auth_uri(self):
|
||||||
@ -71,21 +75,20 @@ class Pocket(BasicNewsRecipe):
|
|||||||
uri = ""
|
uri = ""
|
||||||
uri = u'{0}&apikey={1!s}'.format(uri, self.apikey)
|
uri = u'{0}&apikey={1!s}'.format(uri, self.apikey)
|
||||||
if self.username is None or self.password is None:
|
if self.username is None or self.password is None:
|
||||||
self.user_error("Username or password is blank. Pocket no longer supports blank passwords")
|
self.user_error("Username or password is blank.")
|
||||||
else:
|
else:
|
||||||
uri = u'{0}&username={1!s}'.format(uri, self.username)
|
uri = u'{0}&username={1!s}'.format(uri, self.username)
|
||||||
uri = u'{0}&password={1!s}'.format(uri, self.password)
|
uri = u'{0}&password={1!s}'.format(uri, self.password)
|
||||||
return uri
|
return uri
|
||||||
|
|
||||||
def get_pull_articles_uri(self):
|
def get_pull_articles_uri(self):
|
||||||
"""Return the part of the uri that has all of the get request settings"""
|
|
||||||
uri = ""
|
uri = ""
|
||||||
uri = u'{0}&state={1}'.format(uri, u'unread') # TODO This could be modded to allow pulling archives
|
uri = u'{0}&state={1}'.format(uri, u'unread')
|
||||||
uri = u'{0}&contentType={1}'.format(uri, u'article') # TODO This COULD return images too
|
uri = u'{0}&contentType={1}'.format(uri, u'article')
|
||||||
uri = u'{0}&sort={1}'.format(uri, self.sort_method)
|
uri = u'{0}&sort={1}'.format(uri, self.sort_method)
|
||||||
uri = u'{0}&count={1!s}'.format(uri, self.max_articles_per_feed)
|
uri = u'{0}&count={1!s}'.format(uri, self.max_articles_per_feed)
|
||||||
if self.only_pull_tag is not None:
|
if self.only_pull_tag is not None:
|
||||||
uri = u'{0}tag={1}'.format(uri, self.only_pull_tag)
|
uri = u'{0}&tag={1}'.format(uri, self.only_pull_tag)
|
||||||
return uri
|
return uri
|
||||||
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
@ -100,11 +103,12 @@ class Pocket(BasicNewsRecipe):
|
|||||||
response = urllib2.urlopen(request)
|
response = urllib2.urlopen(request)
|
||||||
pocket_feed = json.load(response)['list']
|
pocket_feed = json.load(response)['list']
|
||||||
except urllib2.HTTPError as e:
|
except urllib2.HTTPError as e:
|
||||||
self.log.exception("Pocket returned an error: {0}\nurl: {1}".format(e, fetch_url))
|
self.log.exception("Pocket returned an error: {0}".format(e.info()))
|
||||||
return []
|
return []
|
||||||
except urllib2.URLError as e:
|
except urllib2.URLError as e:
|
||||||
self.log.exception("Unable to connect to getpocket.com's api: {0}\nurl: {1}".format(e, fetch_url))
|
self.log.exception("Unable to connect to getpocket.com's api: {0}\nurl: {1}".format(e, fetch_url))
|
||||||
return []
|
return []
|
||||||
|
|
||||||
if len(pocket_feed) < self.minimum_articles:
|
if len(pocket_feed) < self.minimum_articles:
|
||||||
self.mark_as_read_after_dl = False
|
self.mark_as_read_after_dl = False
|
||||||
self.user_error("Only {0} articles retrieved, minimum_articles not reached".format(len(pocket_feed)))
|
self.user_error("Only {0} articles retrieved, minimum_articles not reached".format(len(pocket_feed)))
|
||||||
@ -120,39 +124,65 @@ class Pocket(BasicNewsRecipe):
|
|||||||
'sort': pocket_article[1]['sort_id']
|
'sort': pocket_article[1]['sort_id']
|
||||||
})
|
})
|
||||||
self.articles = sorted(self.articles, key=operator.itemgetter('sort'))
|
self.articles = sorted(self.articles, key=operator.itemgetter('sort'))
|
||||||
print self.articles
|
|
||||||
return [("My Pocket Articles for {0}".format(strftime('[%I:%M %p]')), self.articles)]
|
return [("My Pocket Articles for {0}".format(strftime('[%I:%M %p]')), self.articles)]
|
||||||
|
|
||||||
def get_obfuscated_article(self, url):
|
def get_textview(self, url):
|
||||||
|
"""
|
||||||
|
Since Pocket's v3 API they removed access to textview. They also
|
||||||
|
redesigned their page to make it much harder to scrape their textview.
|
||||||
|
We need to pull the article, retrieve the formcheck id, then use it
|
||||||
|
to querty for the json version
|
||||||
|
This function will break when pocket hates us
|
||||||
|
"""
|
||||||
|
ajax_url = self.index_url + u'/a/x/getArticle.php'
|
||||||
soup = self.index_to_soup(url)
|
soup = self.index_to_soup(url)
|
||||||
formcheck_script_tag = soup.find('script', text=re.compile("formCheck"))
|
fc_tag = soup.find('script', text=re.compile("formCheck"))
|
||||||
form_check = formcheck_script_tag.split("=")[1].replace("'", "").replace(";", "").strip()
|
fc_id = re.search(r"\'([\d\w]+)\'", fc_tag).group(1)
|
||||||
article_id = url.split("/")[-1]
|
article_id = url.split("/")[-1]
|
||||||
data = urllib.urlencode({'itemId': article_id, 'formCheck': form_check})
|
data = urllib.urlencode({'itemId': article_id, 'formCheck': fc_id})
|
||||||
response = self.browser.open(self.ajax_url, data)
|
try:
|
||||||
article_json = json.load(response)['article']['article']
|
response = self.browser.open(ajax_url, data)
|
||||||
|
except urllib2.HTTPError as e:
|
||||||
|
self.log.exception("unable to get textview {0}".format(e.info()))
|
||||||
|
raise e
|
||||||
|
return json.load(response)['article']
|
||||||
|
|
||||||
|
def get_obfuscated_article(self, url):
|
||||||
|
"""
|
||||||
|
Our get_textview returns parsed json so prettify it to something well
|
||||||
|
parsed by calibre.
|
||||||
|
"""
|
||||||
|
article = self.get_textview(url)
|
||||||
|
template = Template('<h1>$title</h1>\
|
||||||
|
$img\
|
||||||
|
<div class="body">$body</div>')
|
||||||
|
try:
|
||||||
|
image = '<img src="{0}" \>'.format(article['images']['1']['src'])
|
||||||
|
except:
|
||||||
|
image = ''
|
||||||
with tempfile.NamedTemporaryFile(delete=False) as tf:
|
with tempfile.NamedTemporaryFile(delete=False) as tf:
|
||||||
tf.write(article_json)
|
tf.write(template.safe_substitute(
|
||||||
|
title=article['title'],
|
||||||
|
img=image,
|
||||||
|
body=article['article']
|
||||||
|
))
|
||||||
return tf.name
|
return tf.name
|
||||||
|
|
||||||
def mark_as_read(self, mark_list):
|
def mark_as_read(self, mark_list):
|
||||||
formatted_list = []
|
actions_list = []
|
||||||
for article_id in mark_list:
|
for article_id in mark_list:
|
||||||
formatted_list.append({
|
actions_list.append({
|
||||||
'action': 'archive',
|
'action': 'archive',
|
||||||
'item_id': article_id
|
'item_id': article_id
|
||||||
})
|
})
|
||||||
command = {
|
mark_read_url = u'{0}?actions={1}{2}'.format(
|
||||||
'actions': formatted_list
|
|
||||||
}
|
|
||||||
mark_read_url = u'{0}?{1}'.format(
|
|
||||||
self.modify_api_url,
|
self.modify_api_url,
|
||||||
|
json.dumps(actions_list, separators=(',', ':')),
|
||||||
self.get_auth_uri()
|
self.get_auth_uri()
|
||||||
)
|
)
|
||||||
try:
|
try:
|
||||||
request = urllib2.Request(mark_read_url, json.dumps(command))
|
request = urllib2.Request(mark_read_url)
|
||||||
response = urllib2.urlopen(request)
|
urllib2.urlopen(request)
|
||||||
print u'response = {0}'.format(response.info())
|
|
||||||
except urllib2.HTTPError as e:
|
except urllib2.HTTPError as e:
|
||||||
self.log.exception('Pocket returned an error while archiving articles: {0}'.format(e))
|
self.log.exception('Pocket returned an error while archiving articles: {0}'.format(e))
|
||||||
return []
|
return []
|
||||||
@ -162,7 +192,7 @@ class Pocket(BasicNewsRecipe):
|
|||||||
|
|
||||||
def cleanup(self):
|
def cleanup(self):
|
||||||
if self.mark_as_read_after_dl:
|
if self.mark_as_read_after_dl:
|
||||||
self.mark_as_read([x[1]['item_id'] for x in self.articles])
|
self.mark_as_read([x['item_id'] for x in self.articles])
|
||||||
else:
|
else:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@ -174,7 +204,7 @@ class Pocket(BasicNewsRecipe):
|
|||||||
try:
|
try:
|
||||||
from calibre.ebooks import calibre_cover
|
from calibre.ebooks import calibre_cover
|
||||||
title = self.title if isinstance(self.title, unicode) else \
|
title = self.title if isinstance(self.title, unicode) else \
|
||||||
self.title.decode('utf-8', 'replace')
|
self.title.decode('utf-8', 'replace')
|
||||||
date = strftime(self.timefmt)
|
date = strftime(self.timefmt)
|
||||||
time = strftime('[%I:%M %p]')
|
time = strftime('[%I:%M %p]')
|
||||||
img_data = calibre_cover(title, date, time)
|
img_data = calibre_cover(title, date, time)
|
||||||
@ -192,4 +222,4 @@ class Pocket(BasicNewsRecipe):
|
|||||||
self.log.exception(error_message)
|
self.log.exception(error_message)
|
||||||
raise RuntimeError(error_message)
|
raise RuntimeError(error_message)
|
||||||
|
|
||||||
# vim:ft=python
|
# vim:ft=python tabstop=8 expandtab shiftwidth=4 softtabstop=4
|
||||||
|
Loading…
x
Reference in New Issue
Block a user