From 6590bcd76e398d757a0e708c32e618cd11a1fbfd Mon Sep 17 00:00:00 2001
From: Gerard Ryan <gerard@ryan.lt>
Date: Sat, 11 Dec 2021 22:52:20 +0000
Subject: [PATCH] Fix Pocket/readitlater recipe

This recipe wasn't working (at least for me), where it would
consistently fail with the following error when trying to download an
article:

```
Traceback (most recent call last):
  File "calibre/utils/threadpool.py", line 100, in run
  File "calibre/web/feeds/news.py", line 1186, in fetch_obfuscated_article
  File "<string>", line 157, in get_obfuscated_article
  File "<string>", line 142, in get_textview
  File "re.py", line 201, in search
TypeError: expected string or bytes-like object
```

I believe this is because Pocket don't allow access to their
"Article View" API by default:
https://getpocket.com/developer/docs/v3/article-view

This change uses the original URL of the article, rather than the
pocket url for it (those `getpocket.com/a/read/<id>` URLs in the
browser seem to just redirect me to `getpocket.com/my-list`). I've a
feeling that the old way might have produced cleaner articles (Pocket
cleanup & then Calibre cleanup), but I've never seen it work
successfully.

We could alternatively try to convince Pocket to enable that API for
the "app" we use here (I think this usage would qualify, as it's a
"Pocket specific feature" (for Calibre)). That might require adopting
their full OAuth flow, rather than using username/password. From
reading their API docs with Calibre in mind, I think that would mean
we'd have to have a web page somewhere that the user would have to
access in their normal web browser, click a button to redirect them to
the Pocket page to authorize the Calibre app to access their Pocket
account, which would then redirect them back to our web page where
we'd instruct them to enter the access token into the "password" field
for the recipe in Calibre.
---
 recipes/readitlater.recipe | 51 +++-----------------------------------
 1 file changed, 4 insertions(+), 47 deletions(-)
diff --git a/recipes/readitlater.recipe b/recipes/readitlater.recipe
index ae3f608801..82ebc0a9de 100644
--- a/recipes/readitlater.recipe
+++ b/recipes/readitlater.recipe
@@ -1,5 +1,5 @@
 """
-Pocket Calibre Recipe v1.4
+Pocket Calibre Recipe v1.5
 """
 from calibre import strftime
 from calibre.web.feeds.news import BasicNewsRecipe
@@ -39,7 +39,7 @@ class Pocket(BasicNewsRecipe):
     # Settings people change
     oldest_article = 7.0
     max_articles_per_feed = 50
-    minimum_articles = 10
+    minimum_articles = 1
     mark_as_read_after_dl = True  # Set this to False for testing
     sort_method = 'oldest'  # MUST be either 'oldest' or 'newest'
     # To filter by tag this needs to be a single tag in quotes; IE 'calibre'
@@ -49,7 +49,7 @@ class Pocket(BasicNewsRecipe):
     no_stylesheets = True
     use_embedded_content = False
     needs_subscription = True
-    articles_are_obfuscated = True
+    articles_are_obfuscated = False
     apikey = '19eg0e47pbT32z4793Tf021k99Afl889'
     index_url = u'https://getpocket.com'
     read_api_url = index_url + u'/v3/get'
@@ -118,7 +118,7 @@ class Pocket(BasicNewsRecipe):
                 'item_id':      pocket_article[0],
                 'title':        pocket_article[1]['resolved_title'],
                 'date':         pocket_article[1]['time_updated'],
-                'url':          u'{0}/a/read/{1}'.format(self.index_url, pocket_article[0]),
+                'url':          pocket_article[1]['resolved_url'],
                 'real_url':     pocket_article[1]['resolved_url'],
                 'description':  pocket_article[1]['excerpt'],
                 'sort':         pocket_article[1]['sort_id']
@@ -126,49 +126,6 @@ class Pocket(BasicNewsRecipe):
         self.articles = sorted(self.articles, key=operator.itemgetter('sort'))
         return [("My Pocket Articles for {0}".format(strftime('[%I:%M %p]')), self.articles)]
 
-    def get_textview(self, url):
-        """
-        Since Pocket's v3 API they removed access to textview. They also
-         redesigned their page to make it much harder to scrape their textview.
-         We need to pull the article, retrieve the formcheck id, then use it
-         to querty for the json version
-        This function will break when pocket hates us
-        """
-        ajax_url = self.index_url + u'/a/x/getArticle.php'
-        soup = self.index_to_soup(url)
-        fc_tag = soup.find('script', text=re.compile("formCheck"))
-        fc_id = re.search(r"formCheck = \'([\d\w]+)\';", fc_tag).group(1)
-        article_id = url.split("/")[-1]
-        data = urlencode({'itemId': article_id, 'formCheck': fc_id})
-        try:
-            response = self.browser.open(ajax_url, data)
-        except HTTPError as e:
-            self.log.exception("unable to get textview {0}".format(e.info()))
-            raise e
-        return json.load(response)['article']
-
-    def get_obfuscated_article(self, url):
-        """
-        Our get_textview returns parsed json so prettify it to something well
-        parsed by calibre.
-        """
-        article = self.get_textview(url)
-        template = Template('<h1>$title</h1><div class="body">$body</div>')
-        with tempfile.NamedTemporaryFile(delete=False) as tf:
-            tmpbody = article['article']
-            for img in article['images']:
-                imgdiv = '<div id="RIL_IMG_{0}" class="RIL_IMG"></div>'.format(
-                    article['images'][img]['image_id'])
-                imgtag = r'<img src="{0}" \>'.format(
-                    article['images'][img]['src'])
-                tmpbody = tmpbody.replace(imgdiv, imgtag)
-
-            tf.write(template.safe_substitute(
-                title=article['title'],
-                body=tmpbody
-            ))
-        return tf.name
-
     def mark_as_read(self, mark_list):
         actions_list = []
         for article_id in mark_list: