Merge branch 'fix-pocket-recipe' of https://github.com/grdryn/calibre

This commit is contained in:
Kovid Goyal 2021-12-12 08:04:10 +05:30
commit 4ccbc01125
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C

View File

@ -1,5 +1,5 @@
"""
Pocket Calibre Recipe v1.4
Pocket Calibre Recipe v1.5
"""
from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe
@ -39,7 +39,7 @@ class Pocket(BasicNewsRecipe):
# Settings people change
oldest_article = 7.0
max_articles_per_feed = 50
minimum_articles = 10
minimum_articles = 1
mark_as_read_after_dl = True # Set this to False for testing
sort_method = 'oldest' # MUST be either 'oldest' or 'newest'
# To filter by tag this needs to be a single tag in quotes; IE 'calibre'
@ -49,7 +49,7 @@ class Pocket(BasicNewsRecipe):
no_stylesheets = True
use_embedded_content = False
needs_subscription = True
articles_are_obfuscated = True
articles_are_obfuscated = False
apikey = '19eg0e47pbT32z4793Tf021k99Afl889'
index_url = u'https://getpocket.com'
read_api_url = index_url + u'/v3/get'
@ -118,7 +118,7 @@ class Pocket(BasicNewsRecipe):
'item_id': pocket_article[0],
'title': pocket_article[1]['resolved_title'],
'date': pocket_article[1]['time_updated'],
'url': u'{0}/a/read/{1}'.format(self.index_url, pocket_article[0]),
'url': pocket_article[1]['resolved_url'],
'real_url': pocket_article[1]['resolved_url'],
'description': pocket_article[1]['excerpt'],
'sort': pocket_article[1]['sort_id']
@ -126,49 +126,6 @@ class Pocket(BasicNewsRecipe):
self.articles = sorted(self.articles, key=operator.itemgetter('sort'))
return [("My Pocket Articles for {0}".format(strftime('[%I:%M %p]')), self.articles)]
def get_textview(self, url):
"""
Since Pocket's v3 API they removed access to textview. They also
redesigned their page to make it much harder to scrape their textview.
We need to pull the article, retrieve the formcheck id, then use it
to querty for the json version
This function will break when pocket hates us
"""
ajax_url = self.index_url + u'/a/x/getArticle.php'
soup = self.index_to_soup(url)
fc_tag = soup.find('script', text=re.compile("formCheck"))
fc_id = re.search(r"formCheck = \'([\d\w]+)\';", fc_tag).group(1)
article_id = url.split("/")[-1]
data = urlencode({'itemId': article_id, 'formCheck': fc_id})
try:
response = self.browser.open(ajax_url, data)
except HTTPError as e:
self.log.exception("unable to get textview {0}".format(e.info()))
raise e
return json.load(response)['article']
def get_obfuscated_article(self, url):
"""
Our get_textview returns parsed json so prettify it to something well
parsed by calibre.
"""
article = self.get_textview(url)
template = Template('<h1>$title</h1><div class="body">$body</div>')
with tempfile.NamedTemporaryFile(delete=False) as tf:
tmpbody = article['article']
for img in article['images']:
imgdiv = '<div id="RIL_IMG_{0}" class="RIL_IMG"></div>'.format(
article['images'][img]['image_id'])
imgtag = r'<img src="{0}" \>'.format(
article['images'][img]['src'])
tmpbody = tmpbody.replace(imgdiv, imgtag)
tf.write(template.safe_substitute(
title=article['title'],
body=tmpbody
))
return tf.name
def mark_as_read(self, mark_list):
actions_list = []
for article_id in mark_list: