Merge branch 'fix-pocket-recipe' of https://github.com/grdryn/calibre

2025-11-26 16:25:02 -05:00 · 2021-12-12 08:04:10 +05:30 · 2021-12-12 08:04:10 +05:30 · 4ccbc01125
commit 4ccbc01125
parent 2a45519e5d 6590bcd76e
1 changed files with 4 additions and 47 deletions
--- a/recipes/readitlater.recipe
+++ b/recipes/readitlater.recipe
@ -1,5 +1,5 @@
 """
-Pocket Calibre Recipe v1.4
+Pocket Calibre Recipe v1.5
 """
 from calibre import strftime
 from calibre.web.feeds.news import BasicNewsRecipe
@ -39,7 +39,7 @@ class Pocket(BasicNewsRecipe):
    # Settings people change
    oldest_article = 7.0
    max_articles_per_feed = 50
-    minimum_articles = 10
+    minimum_articles = 1
    mark_as_read_after_dl = True  # Set this to False for testing
    sort_method = 'oldest'  # MUST be either 'oldest' or 'newest'
    # To filter by tag this needs to be a single tag in quotes; IE 'calibre'
@ -49,7 +49,7 @@ class Pocket(BasicNewsRecipe):
    no_stylesheets = True
    use_embedded_content = False
    needs_subscription = True
-    articles_are_obfuscated = True
+    articles_are_obfuscated = False
    apikey = '19eg0e47pbT32z4793Tf021k99Afl889'
    index_url = u'https://getpocket.com'
    read_api_url = index_url + u'/v3/get'
@ -118,7 +118,7 @@ class Pocket(BasicNewsRecipe):
                'item_id':      pocket_article[0],
                'title':        pocket_article[1]['resolved_title'],
                'date':         pocket_article[1]['time_updated'],
-                'url':          u'{0}/a/read/{1}'.format(self.index_url, pocket_article[0]),
+                'url':          pocket_article[1]['resolved_url'],
                'real_url':     pocket_article[1]['resolved_url'],
                'description':  pocket_article[1]['excerpt'],
                'sort':         pocket_article[1]['sort_id']
@ -126,49 +126,6 @@ class Pocket(BasicNewsRecipe):
        self.articles = sorted(self.articles, key=operator.itemgetter('sort'))
        return [("My Pocket Articles for {0}".format(strftime('[%I:%M %p]')), self.articles)]

-    def get_textview(self, url):
-        """
-        Since Pocket's v3 API they removed access to textview. They also
-         redesigned their page to make it much harder to scrape their textview.
-         We need to pull the article, retrieve the formcheck id, then use it
-         to querty for the json version
-        This function will break when pocket hates us
-        """
-        ajax_url = self.index_url + u'/a/x/getArticle.php'
-        soup = self.index_to_soup(url)
-        fc_tag = soup.find('script', text=re.compile("formCheck"))
-        fc_id = re.search(r"formCheck = \'([\d\w]+)\';", fc_tag).group(1)
-        article_id = url.split("/")[-1]
-        data = urlencode({'itemId': article_id, 'formCheck': fc_id})
-        try:
-            response = self.browser.open(ajax_url, data)
-        except HTTPError as e:
-            self.log.exception("unable to get textview {0}".format(e.info()))
-            raise e
-        return json.load(response)['article']
-
-    def get_obfuscated_article(self, url):
-        """
-        Our get_textview returns parsed json so prettify it to something well
-        parsed by calibre.
-        """
-        article = self.get_textview(url)
-        template = Template('<h1>$title</h1><div class="body">$body</div>')
-        with tempfile.NamedTemporaryFile(delete=False) as tf:
-            tmpbody = article['article']
-            for img in article['images']:
-                imgdiv = '<div id="RIL_IMG_{0}" class="RIL_IMG"></div>'.format(
-                    article['images'][img]['image_id'])
-                imgtag = r'<img src="{0}" \>'.format(
-                    article['images'][img]['src'])
-                tmpbody = tmpbody.replace(imgdiv, imgtag)
-
-            tf.write(template.safe_substitute(
-                title=article['title'],
-                body=tmpbody
-            ))
-        return tf.name
-
    def mark_as_read(self, mark_list):
        actions_list = []
        for article_id in mark_list: