Update Philosophy Now

2025-07-09 03:04:10 -04:00 · 2022-06-16 11:56:39 +05:30 · 2022-06-16 11:56:39 +05:30 · 211efd35d1
commit 211efd35d1
parent c0a2656cb2
1 changed files with 52 additions and 53 deletions
--- a/recipes/phillosophy_now.recipe
+++ b/recipes/phillosophy_now.recipe
@ -1,12 +1,12 @@
-import re
+from calibre.web.feeds.news import BasicNewsRecipe, classes
-from calibre.web.feeds.recipes import BasicNewsRecipe
+from calibre import browser
 from collections import OrderedDict
 class PhilosophyNow(BasicNewsRecipe):
    title = 'Philosophy Now'
-    __author__ = 'Rick Shang'
+    __author__ = 'unkn0wn'
    description = '''Philosophy Now is a lively magazine for everyone
    interested in ideas. It isn't afraid to tackle all the major questions of
    life, the universe and everything. Published every two months, it tries to
@ -15,65 +15,64 @@ class PhilosophyNow(BasicNewsRecipe):
    reading matter for those already ensnared by the muse, such as philosophy
    students and academics.'''
    language = 'en'
-    category = 'news'
+    use_embedded_content = False
    encoding = 'UTF-8'
    keep_only_tags = [dict(attrs={'id': 'fullMainColumn'})]
    remove_tags = [dict(attrs={'class': 'articleTools'})]
    no_javascript = True
    no_stylesheets = True
-    needs_subscription = True
+    remove_javascript = True
    remove_attributes = ['height', 'width', 'style']
    encoding = 'utf-8'
    ignore_duplicate_articles = {'url'}
-    def get_browser(self):
+    keep_only_tags = [classes('article_page')]
-        br = BasicNewsRecipe.get_browser(self)
+    remove_tags = [dict(name='div', attrs={'id':'welcome_box'})]
        br.open('https://philosophynow.org/auth/login')
        br.select_form(name="loginForm")
        br['username'] = self.username
        br['password'] = self.password
        br.submit()
        return br
    def parse_index(self):
-        # Go to the issue
+        soup = self.index_to_soup('https://philosophynow.org/')
-        soup0 = self.index_to_soup('http://philosophynow.org/')
+        div = soup.find('div', attrs={'id': 'aside_issue_cover'})
-        issue = soup0.find('div', attrs={'id': 'navColumn'})
+        url = div.find('a', href=True)['href']
-
+        for issue in div.findAll('div', attrs={'id':'aside_issue_text'}):
-        # Find date & cover
+            self.log('Downloading issue:', self.tag_to_string(issue).strip())
-        cover = issue.find('div', attrs={'id': 'cover'})
+        cov_url = div.find('img', src=True)['src']
-        date = self.tag_to_string(cover.find('h3')).strip()
+        self.cover_url = 'https://philosophynow.org' + cov_url
-        self.timefmt = u' [%s]' % date
+        soup = self.index_to_soup('https://philosophynow.org' + url)
        img = cover.find('img', src=True)['src']
        self.cover_url = 'http://philosophynow.org' + \
            re.sub('medium', 'large', img)
        issuenum = re.sub('/media/images/covers/medium/issue', '', img)
        issuenum = re.sub('.jpg', '', issuenum)
        # Go to the main body
        current_issue_url = 'http://philosophynow.org/issues/' + issuenum
        soup = self.index_to_soup(current_issue_url)
        div = soup.find('div', attrs={'class': 'contentsColumn'})
        feeds = OrderedDict()
-        for post in div.findAll('h1'):
+        for h2 in soup.findAll('h2', attrs={'class':'article_list_title'}):
            articles = []
-            a = post.find('a', href=True)
+            a = h2.find('a', href=True)
-            if a is not None:
+            url = a['href']
-                url = "http://philosophynow.org" + a['href']
+            url = 'https://philosophynow.org' + url
-                title = self.tag_to_string(a).strip()
+            title = self.tag_to_string(a)
-                s = post.findPrevious('h3')
+            des = h2.find_next_sibling('p')
-                section_title = self.tag_to_string(s).strip()
+            if des:
-                d = post.findNext('h2')
+                desc = self.tag_to_string(des)
-                desc = self.tag_to_string(d).strip()
+            h3 = h2.find_previous_sibling('h3')
-                articles.append({'title': title, 'url': url,
+            section_title = self.tag_to_string(h3).title()
-                                 'description': desc, 'date': ''})
+            self.log('\t', title)
            self.log('\t', desc)
            self.log('\t\t', url)
            articles.append({
                'title': title,
                'url': url,
                'description': desc})
-                if articles:
+            if articles:
-                    if section_title not in feeds:
+                if section_title not in feeds:
-                        feeds[section_title] = []
+                    feeds[section_title] = []
-                    feeds[section_title] += articles
+                feeds[section_title] += articles
        ans = [(key, val) for key, val in feeds.items()]
        return ans
-    def cleanup(self):
+    # PN changes the content it delivers based on cookies, so the
-        self.browser.open('http://philosophynow.org/auth/logout')
+    # following ensures that we send no cookies
    def get_browser(self, *args, **kwargs):
        return self
    def clone_browser(self, *args, **kwargs):
        return self.get_browser()
    def open_novisit(self, *args, **kwargs):
        br = browser()
        return br.open_novisit(*args, **kwargs)
    open = open_novisit